Morphological variation of amazon and sailfin mollies

In this study, I am looking at various populations of amazon and sailfin mollies across their native/introduced range to assess morphological variation both within and among the species. I am using the Pickle fish collections for my samples.

All data

I will use all data first to see the trends, then filter for confirmed adult sizes (see “Filtered”).

Initial steps

Data collection

## Using libcurl 8.3.0 with Schannel

Checking normality

Checking normality

I will use Shapiro-wilke, histograms, and QQ plots to determine what traits are normal. These will only be performed on continuous variables, as discrete variables are not normal by nature.

Conclusions: literally all of them are NOT normal… will log transform them and double check normality. Had to square-root three traits () because still not normal with log transform.

SW Test
shapiro.test(raw1$SL)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$SL
## W = 0.9763, p-value = 2.763e-05
shapiro.test(raw1$BD)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$BD
## W = 0.96287, p-value = 1.787e-07
shapiro.test(raw1$CPD)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$CPD
## W = 0.96431, p-value = 2.908e-07
shapiro.test(raw1$CPL)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$CPL
## W = 0.97288, p-value = 6.831e-06
shapiro.test(raw1$PreDL)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$PreDL
## W = 0.97924, p-value = 9.997e-05
shapiro.test(raw1$DbL)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$DbL
## W = 0.97697, p-value = 3.68e-05
shapiro.test(raw1$HL)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$HL
## W = 0.95327, p-value = 8.841e-09
shapiro.test(raw1$HD)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$HD
## W = 0.96761, p-value = 9.28e-07
shapiro.test(raw1$HW)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$HW
## W = 0.97201, p-value = 4.833e-06
shapiro.test(raw1$SnL)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$SnL
## W = 0.97684, p-value = 3.477e-05
shapiro.test(raw1$OL)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$OL
## W = 0.98631, p-value = 0.003102
####### WITHOUT MONSTER ########

shapiro.test(raw1b$SL)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1b$SL
## W = 0.97628, p-value = 2.822e-05
shapiro.test(raw1b$BD)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1b$BD
## W = 0.96271, p-value = 1.754e-07
shapiro.test(raw1b$CPD)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1b$CPD
## W = 0.9646, p-value = 3.326e-07
shapiro.test(raw1b$CPL)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1b$CPL
## W = 0.97267, p-value = 6.468e-06
shapiro.test(raw1b$PreDL)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1b$PreDL
## W = 0.97945, p-value = 0.0001126
shapiro.test(raw1b$DbL)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1b$DbL
## W = 0.97724, p-value = 4.257e-05
shapiro.test(raw1b$HL)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1b$HL
## W = 0.95317, p-value = 8.955e-09
shapiro.test(raw1b$HD)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1b$HD
## W = 0.96763, p-value = 9.667e-07
shapiro.test(raw1b$HW)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1b$HW
## W = 0.9723, p-value = 5.603e-06
shapiro.test(raw1b$SnL)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1b$SnL
## W = 0.97673, p-value = 3.419e-05
shapiro.test(raw1b$OL)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1b$OL
## W = 0.98643, p-value = 0.003371
Histograms
hist(raw1$SL, breaks=30)

hist(raw1$BD, breaks=30)

hist(raw1$CPD, breaks=30)

hist(raw1$CPL, breaks=30)

hist(raw1$PreDL, breaks=30)

hist(raw1$DbL, breaks=30)

hist(raw1$HL, breaks=30)

hist(raw1$HD, breaks=30)

hist(raw1$HW, breaks=30)

hist(raw1$SnL, breaks=30)

hist(raw1$OL, breaks=30)

####### WITHOUT MONSTER ########

hist(raw1b$SL, breaks=30)

hist(raw1b$BD, breaks=30)

hist(raw1b$CPD, breaks=30)

hist(raw1b$CPL, breaks=30)

hist(raw1b$PreDL, breaks=30)

hist(raw1b$DbL, breaks=30)

hist(raw1b$HL, breaks=30)

hist(raw1b$HD, breaks=30)

hist(raw1b$HW, breaks=30)

hist(raw1b$SnL, breaks=30)

hist(raw1b$OL, breaks=30)

QQ plots
qqnorm(raw1$SL)
qqline(raw1$SL)

qqnorm(raw1$BD)
qqline(raw1$BD)

qqnorm(raw1$CPD)
qqline(raw1$CPD)

qqnorm(raw1$CPL)
qqline(raw1$CPL)

qqnorm(raw1$PreDL)
qqline(raw1$PreDL)

qqnorm(raw1$DbL)
qqline(raw1$DbL)

qqnorm(raw1$HL)
qqline(raw1$HL)

qqnorm(raw1$HD)
qqline(raw1$HD)

qqnorm(raw1$HW)
qqline(raw1$HW)

qqnorm(raw1$SnL)
qqline(raw1$SnL)

qqnorm(raw1$OL)
qqline(raw1$OL)

Log transformations

Since all of the continuous characters were not normal, I will log transform them, and proceed with the transformed data in all analyses.

raw1[paste0(names(raw1), '_log')] <- log(raw1[, 26:35], 10)
#for some reason did the log of the whole data frame instead of those specific columns, so I will just remove the unnecessary columns instead of spending time on what went wrong.

raw1 <- raw1[-c(37:60)]
raw1 <- raw1[-c(48)]

#double checking normality with a SW test and QQ plot

shapiro.test(raw1$SL_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$SL_log
## W = 0.99271, p-value = 0.1056
shapiro.test(raw1$BD_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$BD_log
## W = 0.98772, p-value = 0.006571
shapiro.test(raw1$CPD_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$CPD_log
## W = 0.99285, p-value = 0.1144
shapiro.test(raw1$CPL_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$CPL_log
## W = 0.99513, p-value = 0.3824
shapiro.test(raw1$PreDL_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$PreDL_log
## W = 0.99436, p-value = 0.2588
shapiro.test(raw1$DbL_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$DbL_log
## W = 0.98042, p-value = 0.0001711
shapiro.test(raw1$HL_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$HL_log
## W = 0.99386, p-value = 0.1984
shapiro.test(raw1$HD_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$HD_log
## W = 0.99661, p-value = 0.7102
shapiro.test(raw1$HW_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$HW_log
## W = 0.99491, p-value = 0.3435
shapiro.test(raw1$SnL_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$SnL_log
## W = 0.99366, p-value = 0.1788
shapiro.test(raw1$OL_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$OL_log
## W = 0.99271, p-value = 0.1056
qqnorm(raw1$SL_log)
qqline(raw1$SL_log)

qqnorm(raw1$BD_log)
qqline(raw1$BD_log)

qqnorm(raw1$CPD_log)
qqline(raw1$CPD_log)

qqnorm(raw1$CPL_log)
qqline(raw1$CPL_log)

qqnorm(raw1$PreDL_log)
qqline(raw1$PreDL_log)

qqnorm(raw1$DbL_log)
qqline(raw1$DbL_log)

qqnorm(raw1$HL_log)
qqline(raw1$HL_log)

qqnorm(raw1$HD_log)
qqline(raw1$HD_log)

qqnorm(raw1$HW_log)
qqline(raw1$HW_log)

qqnorm(raw1$SnL_log)
qqline(raw1$SnL_log)

qqnorm(raw1$OL_log)
qqline(raw1$OL_log)

Need to try other transformations for body depth, predorsal length, and dorsal base length. Will try squareroot or cuberoot to see what gets them normal. Cubed got them closest to normal (bd still not normal but other two are).

#raw1$sqR_BD ='^'(raw1$BD,1/2)
#raw1$sqR_PreDL ='^'(raw1$PreDL,1/2)
#raw1$sqR_DbL ='^'(raw1$DbL,1/2)

#shapiro.test(raw1$sqR_BD) #didn't work
#shapiro.test(raw1$sqR_PreDL)#worked
#shapiro.test(raw1$sqR_DbL)#worked

#qqnorm(raw1$sqR_BD)
#qqline(raw1$sqR_BD)

#qqnorm(raw1$sqR_PreDL)
#qqline(raw1$sqR_PreDL)

#qqnorm(raw1$sqR_DbL)
#qqline(raw1$sqR_PreDL)

#cubed root
raw1$cbR_BD ='^'(raw1$BD,1/3)
raw1$cbR_PreDL ='^'(raw1$PreDL,1/3)
raw1$cbR_DbL ='^'(raw1$DbL,1/3)

shapiro.test(raw1$cbR_BD) #didn't work, but closest to normal
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$cbR_BD
## W = 0.9892, p-value = 0.01468
shapiro.test(raw1$cbR_PreDL)#worked
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$cbR_PreDL
## W = 0.9939, p-value = 0.2027
shapiro.test(raw1$cbR_DbL)#worked
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$cbR_DbL
## W = 0.99582, p-value = 0.5246
qqnorm(raw1$cbR_BD)
qqline(raw1$cbR_BD)

qqnorm(raw1$cbR_PreDL)
qqline(raw1$cbR_PreDL)

qqnorm(raw1$cbR_DbL)
qqline(raw1$cbR_PreDL)

#raw1 <- raw1[-c(48:50)]
#need to cube root SL to use in regressions

raw1$cbR_SL ='^'(raw1$SL,1/3)

Correcting for body size (residuals)

SL correction

Since amazons are in general bigger than sailfin, we don’t want any results to be due to this difference in body size bias. Therefore, we will see what traits are influenced by body size (regressions) and correct for body size when necessary (absolute value of residuals). We can then use the residuals when comparing between species for traits that are influenced by body size, and raw/transformed data for traits that are not influenced by body size. I will also calculate standardized residuals to compare residuals across traits in later analyses.

Quick results summary: traits not influenced by body size are left & right pelvic, anal, scales above and below lateral line (except in mexicana), scales before dorsal fin and fluctuating asymmetry; all other traits influenced by body size.

Regressions

  • STEP ONE: plot trait vs body size
library(ggplot2)
library(ggpubr)



lat <- raw1[raw1$SPP == "p.latipinna",]


form <- raw1[raw1$SPP == "p.formosa",]


mex <- raw1[raw1$SPP == "p.mexicana",]

###just want to see if body size is actually different between the species

t.test(lat$SL_log, form$SL_log, alternative = "two.sided")
## 
##  Welch Two Sample t-test
## 
## data:  lat$SL_log and form$SL_log
## t = 9.6224, df = 295.24, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.08951299 0.13554293
## sample estimates:
## mean of x mean of y 
## 0.9609465 0.8484185
t.test(lat$BD_log, form$BD_log, alternative = "two.sided")
## 
##  Welch Two Sample t-test
## 
## data:  lat$BD_log and form$BD_log
## t = -1.3924, df = 294.83, p-value = 0.1649
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.030362183  0.005201163
## sample estimates:
## mean of x mean of y 
## 0.9296190 0.9421995
t.test(lat$SL_log, mex$SL_log, alternative = "two.sided")
## 
##  Welch Two Sample t-test
## 
## data:  lat$SL_log and mex$SL_log
## t = 19.62, df = 63.026, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.2580037 0.3165191
## sample estimates:
## mean of x mean of y 
## 0.9609465 0.6736851
t.test(lat$BD_log, mex$BD_log, alternative = "two.sided")
## 
##  Welch Two Sample t-test
## 
## data:  lat$BD_log and mex$BD_log
## t = 0.80331, df = 59.941, p-value = 0.425
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.01404771  0.03290210
## sample estimates:
## mean of x mean of y 
## 0.9296190 0.9201918
t.test(mex$SL_log, form$SL_log, alternative = "two.sided")
## 
##  Welch Two Sample t-test
## 
## data:  mex$SL_log and form$SL_log
## t = -11.892, df = 64.41, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.2040836 -0.1453833
## sample estimates:
## mean of x mean of y 
## 0.6736851 0.8484185
t.test(mex$BD_log, form$BD_log, alternative = "two.sided")
## 
##  Welch Two Sample t-test
## 
## data:  mex$BD_log and form$BD_log
## t = -1.8729, df = 60.675, p-value = 0.0659
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.045506633  0.001491221
## sample estimates:
## mean of x mean of y 
## 0.9201918 0.9421995
##### LAT #####
reg.lat.D <- lm(lat$D ~ lat$SL)
sd.lat.D <- rstandard(reg.lat.D)
reg.lat.D.plot <- ggplot(lat, aes(x = SL, y = D)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.D.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.P1 <- lm(lat$P1 ~ lat$SL)
sd.lat.P1 <- rstandard(reg.lat.P1)
reg.lat.P1.plot <- ggplot(lat, aes(x = SL, y = P1)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.P1.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.A <- lm(lat$A ~ lat$SL)
sd.lat.A <- rstandard(reg.lat.A)
reg.lat.A.plot <- ggplot(lat, aes(x = SL, y = A)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.A.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.P1.R <- lm(lat$P1.R ~ lat$SL)
sd.lat.P1.R <- rstandard(reg.lat.P1.R)
reg.lat.P1.R.plot <- ggplot(lat, aes(x = SL, y = P1.R)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.P1.R.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.LLSC <- lm(lat$LLSC ~ lat$SL)
sd.lat.LLSC <- rstandard(reg.lat.LLSC)
reg.lat.LLSC.plot <- ggplot(lat, aes(x = SL, y = LLSC)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.LLSC.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.SALL <- lm(lat$SALL ~ lat$SL)
sd.lat.SALL <- rstandard(reg.lat.SALL)
reg.lat.SALL.plot <- ggplot(lat, aes(x = SL, y = SALL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.SALL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.SBLL <- lm(lat$SBLL ~ lat$SL)
sd.lat.SBLL <- rstandard(reg.lat.SBLL)
reg.lat.SBLL.plot <- ggplot(lat, aes(x = SL, y = SBLL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.SBLL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.SBDF <- lm(lat$SBDF ~ lat$SL)
sd.lat.SBDF <- rstandard(reg.lat.SBDF)
reg.lat.SBDF.plot <- ggplot(lat, aes(x = SL, y = SBDF)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.SBDF.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.BD <- lm(lat$cbR_BD ~ lat$cbR_SL)
sd.lat.BD <- rstandard(reg.lat.BD)
reg.lat.BD.plot <- ggplot(lat, aes(x = cbR_SL, y = cbR_BD)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.BD.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.CPD <- lm(lat$CPD_log ~ lat$SL_log)
sd.lat.CPD <- rstandard(reg.lat.CPD)
reg.lat.CPD.plot <- ggplot(lat, aes(x = SL_log, y = CPD_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.CPD.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.CPL <- lm(lat$CPL_log ~ lat$SL_log)
sd.lat.CPL <- rstandard(reg.lat.CPL)
reg.lat.CPL.plot <- ggplot(lat, aes(x = SL_log, y = CPL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.CPL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.PreDL <- lm(lat$cbR_PreDL ~ lat$cbR_SL)
sd.lat.PreDL <- rstandard(reg.lat.PreDL)
reg.lat.PreDL.plot <- ggplot(lat, aes(x = cbR_SL, y = cbR_PreDL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.PreDL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.DbL <- lm(lat$cbR_DbL ~ lat$cbR_SL)
sd.lat.DbL <- rstandard(reg.lat.DbL)
reg.lat.DbL.plot <- ggplot(lat, aes(x = cbR_SL, y = cbR_DbL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.DbL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.HL <- lm(lat$HL_log ~ lat$SL_log)
sd.lat.HL <- rstandard(reg.lat.HL)
reg.lat.HL.plot <- ggplot(lat, aes(x = SL_log, y = HL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.HL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.HD <- lm(lat$HD_log ~ lat$SL_log)
sd.lat.HD <- rstandard(reg.lat.HD)
reg.lat.HD.plot <- ggplot(lat, aes(x = SL_log, y = HD_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.HD.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.HW <- lm(lat$HW_log ~ lat$SL_log)
sd.lat.HW <- rstandard(reg.lat.HW)
reg.lat.HW.plot <- ggplot(lat, aes(x = SL_log, y = HW_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.HW.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.SnL <- lm(lat$SnL_log ~ lat$SL_log)
sd.lat.SnL <- rstandard(reg.lat.SnL)
reg.lat.SnL.plot <- ggplot(lat, aes(x = SL_log, y = SnL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.SnL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.OL <- lm(lat$OL_log ~ lat$SL_log)
sd.lat.OL <- rstandard(reg.lat.OL)
reg.lat.OL.plot <- ggplot(lat, aes(x = SL_log, y = OL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.OL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.FLA <- lm(lat$FLA ~ lat$SL)
sd.lat.FLA <- rstandard(reg.lat.FLA)
reg.lat.FLA.plot <- ggplot(lat, aes(x = SL, y = FLA)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.FLA.plot
## `geom_smooth()` using formula = 'y ~ x'

##### FORM #####

reg.form.D <- lm(form$D ~ form$SL)
sd.form.D <- rstandard(reg.form.D)
reg.form.D.plot <- ggplot(form, aes(x =SL, y = D)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.D.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.P1 <- lm(form$P1 ~ form$SL)
sd.form.P1 <- rstandard(reg.form.P1)
reg.form.P1.plot <- ggplot(form, aes(x = SL, y = P1)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.P1.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.A <- lm(form$A ~ form$SL)
sd.form.A <- rstandard(reg.form.A)
reg.form.A.plot <- ggplot(form, aes(x = SL, y = A)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.A.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.P1.R <- lm(form$P1.R ~ form$SL)
sd.form.P1.R <- rstandard(reg.form.P1.R)
reg.form.P1.R.plot <- ggplot(form, aes(x = SL, y = P1.R)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.P1.R.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.LLSC <- lm(form$LLSC ~ form$SL)
sd.form.LLSC <- rstandard(reg.form.LLSC)
reg.form.LLSC.plot <- ggplot(form, aes(x = SL, y = LLSC)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.LLSC.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.SALL <- lm(form$SALL ~ form$SL)
sd.form.SALL <- rstandard(reg.form.SALL)
reg.form.SALL.plot <- ggplot(form, aes(x = SL, y = SALL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.SALL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.SBLL <- lm(form$SBLL ~ form$SL)
sd.form.SBLL <- rstandard(reg.form.SBLL)
reg.form.SBLL.plot <- ggplot(form, aes(x = SL, y = SBLL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.SBLL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.SBDF <- lm(form$SBDF ~ form$SL)
sd.form.SBDF <- rstandard(reg.form.SBDF)
reg.form.SBDF.plot <- ggplot(form, aes(x = SL, y = SBDF)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.SBDF.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.BD <- lm(form$cbR_BD ~ form$cbR_SL)
sd.form.BD <- rstandard(reg.form.BD)
reg.form.BD.plot <- ggplot(form, aes(x = cbR_SL, y = cbR_BD)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.BD.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.CPD <- lm(form$CPD_log ~ form$SL_log)
sd.form.CPD <- rstandard(reg.form.CPD)
reg.form.CPD.plot <- ggplot(form, aes(x = SL_log, y = CPD_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.CPD.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.CPL <- lm(form$CPL_log ~ form$SL_log)
sd.form.CPL <- rstandard(reg.form.CPL)
reg.form.CPL.plot <- ggplot(form, aes(x = SL_log, y = CPL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.CPL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.PreDL <- lm(form$cbR_PreDL ~ form$cbR_SL)
sd.form.PreDL <- rstandard(reg.form.PreDL)
reg.form.PreDL.plot <- ggplot(form, aes(x = cbR_SL, y = cbR_PreDL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.PreDL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.DbL <- lm(form$cbR_DbL ~ form$cbR_SL)
sd.form.DbL <- rstandard(reg.form.DbL)
reg.form.DbL.plot <- ggplot(form, aes(x = cbR_SL, y = cbR_DbL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.DbL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.HL <- lm(form$HL_log ~ form$SL_log)
sd.form.HL <- rstandard(reg.form.HL)
reg.form.HL.plot <- ggplot(form, aes(x = SL_log, y = HL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.HL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.HD <- lm(form$HD_log ~ form$SL_log)
sd.form.HD <- rstandard(reg.form.HD)
reg.form.HD.plot <- ggplot(form, aes(x = SL_log, y = HD_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.HD.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.HW <- lm(form$HW_log ~ form$SL_log)
sd.form.HW <- rstandard(reg.form.HW)
reg.form.HW.plot <- ggplot(form, aes(x = SL_log, y = HW_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.HW.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.SnL <- lm(form$SnL_log ~ form$SL_log)
sd.form.SnL <- rstandard(reg.form.SnL)
reg.form.SnL.plot <- ggplot(form, aes(x = SL_log, y = SnL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.SnL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.OL <- lm(form$OL_log ~ form$SL_log)
sd.form.OL <- rstandard(reg.form.OL)
reg.form.OL.plot <- ggplot(form, aes(x = SL_log, y = OL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.OL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.FLA <- lm(form$FLA ~ form$SL)
sd.form.FLA <- rstandard(reg.form.FLA)
reg.form.FLA.plot <- ggplot(form, aes(x = SL, y = FLA)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.FLA.plot
## `geom_smooth()` using formula = 'y ~ x'

##### MEX #####
reg.mex.D <- lm(mex$D ~ mex$SL)
sd.mex.D <- rstandard(reg.mex.D)
reg.mex.D.plot <- ggplot(mex, aes(x = SL, y = D)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.D.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.P1 <- lm(mex$P1 ~ mex$SL)
sd.mex.P1 <- rstandard(reg.mex.P1)
reg.mex.P1.plot <- ggplot(mex, aes(x = SL, y = P1)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.P1.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.A <- lm(mex$A ~ mex$SL)
sd.mex.A <- rstandard(reg.mex.A)
reg.mex.A.plot <- ggplot(mex, aes(x = SL, y = A)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.A.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.P1.R <- lm(mex$P1.R ~ mex$SL)
sd.mex.P1.R <- rstandard(reg.mex.P1.R)
reg.mex.P1.R.plot <- ggplot(mex, aes(x = SL, y = P1.R)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.P1.R.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.LLSC <- lm(mex$LLSC ~ mex$SL)
sd.mex.LLSC <- rstandard(reg.mex.LLSC)
reg.mex.LLSC.plot <- ggplot(mex, aes(x = SL, y = LLSC)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.LLSC.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.SALL <- lm(mex$SALL ~ mex$SL)
sd.mex.SALL <- rstandard(reg.mex.SALL)
reg.mex.SALL.plot <- ggplot(mex, aes(x = SL, y = SALL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.SALL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.SBLL <- lm(mex$SBLL ~ mex$SL)
sd.mex.SBLL <- rstandard(reg.mex.SBLL)
reg.mex.SBLL.plot <- ggplot(mex, aes(x = SL, y = SBLL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.SBLL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.SBDF <- lm(mex$SBDF ~ mex$SL)
sd.mex.SBDF <- rstandard(reg.mex.SBDF)
reg.mex.SBDF.plot <- ggplot(mex, aes(x = SL, y = SBDF)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.SBDF.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.BD <- lm(mex$cbR_BD ~ mex$cbR_SL)
sd.mex.BD <- rstandard(reg.mex.BD)
reg.mex.BD.plot <- ggplot(mex, aes(x = cbR_SL, y = cbR_BD)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.BD.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.CPD <- lm(mex$CPD_log ~ mex$SL_log)
sd.mex.CPD <- rstandard(reg.mex.CPD)
reg.mex.CPD.plot <- ggplot(mex, aes(x = SL_log, y = CPD_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.CPD.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.CPL <- lm(mex$CPL_log ~ mex$SL_log)
sd.mex.CPL <- rstandard(reg.mex.CPL)
reg.mex.CPL.plot <- ggplot(mex, aes(x = SL_log, y = CPL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.CPL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.PreDL <- lm(mex$cbR_PreDL ~ mex$cbR_SL)
sd.mex.PreDL <- rstandard(reg.mex.PreDL)
reg.mex.PreDL.plot <- ggplot(mex, aes(x = cbR_SL, y = cbR_PreDL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.PreDL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.DbL <- lm(mex$cbR_DbL ~ mex$cbR_SL)
sd.mex.DbL <- rstandard(reg.mex.DbL)
reg.mex.DbL.plot <- ggplot(mex, aes(x = cbR_SL, y = cbR_DbL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.DbL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.HL <- lm(mex$HL_log ~ mex$SL_log)
sd.mex.HL <- rstandard(reg.mex.HL)
reg.mex.HL.plot <- ggplot(mex, aes(x = SL_log, y = HL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.HL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.HD <- lm(mex$HD_log ~ mex$SL_log)
sd.mex.HD <- rstandard(reg.mex.HD)
reg.mex.HD.plot <- ggplot(mex, aes(x = SL_log, y = HD_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.HD.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.HW <- lm(mex$HW_log ~ mex$SL_log)
sd.mex.HW <- rstandard(reg.mex.HW)
reg.mex.HW.plot <- ggplot(mex, aes(x = SL_log, y = HW_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.HW.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.SnL <- lm(mex$SnL_log ~ mex$SL_log)
sd.mex.SnL <- rstandard(reg.mex.SnL)
reg.mex.SnL.plot <- ggplot(mex, aes(x = SL_log, y = SnL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.SnL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.OL <- lm(mex$OL_log ~ mex$SL_log)
sd.mex.OL <- rstandard(reg.mex.OL)
reg.mex.OL.plot <- ggplot(mex, aes(x = SL_log, y = OL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.OL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.FLA <- lm(mex$FLA ~ mex$SL)
sd.mex.FLA <- rstandard(reg.mex.FLA)
reg.mex.FLA.plot <- ggplot(mex, aes(x = SL, y = FLA)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.FLA.plot
## `geom_smooth()` using formula = 'y ~ x'

Residuals

  • STEP TWO: get residuals for each individual for traits that were influenced by body size

  • STEP THREE: convert residuals to absolute value

##### LAT #####

abs.lat.D <- abs(res.lat.D)
mean(abs.lat.D)
## [1] 0.5375505
abs.lat.P1 <- abs(res.lat.P1)
mean(abs.lat.P1)
## [1] 0.5466667
abs.lat.P1.R <- abs(res.lat.P1.R)
mean(abs.lat.P1.R)
## [1] 0.5799393
abs.lat.LLSC <- abs(res.lat.LLSC)
mean(abs.lat.LLSC)
## [1] 0.7664044
abs.lat.SBLL <- abs(res.lat.SBLL)
mean(abs.lat.SBLL)
## [1] 0.2461336
abs.lat.BD <- abs(res.lat.BD)
mean(abs.lat.BD)
## [1] 0.04778326
abs.lat.CPD <- abs(res.lat.CPD)
mean(abs.lat.CPD)
## [1] 0.03695739
abs.lat.CPL <- abs(res.lat.CPL)
mean(abs.lat.CPL)
## [1] 0.03732998
abs.lat.PreDL <- abs(res.lat.PreDL)
mean(abs.lat.PreDL)
## [1] 0.02707836
abs.lat.DbL <- abs(res.lat.DbL)
mean(abs.lat.DbL)
## [1] 0.0516366
abs.lat.HL <- abs(res.lat.HL)
mean(abs.lat.HL)
## [1] 0.03588147
abs.lat.HD <- abs(res.lat.HD)
mean(abs.lat.HD)
## [1] 0.03506907
abs.lat.HW <- abs(res.lat.HW)
mean(abs.lat.HW)
## [1] 0.03208151
abs.lat.SnL <- abs(res.lat.SnL)
mean(abs.lat.SnL)
## [1] 0.03208694
abs.lat.OL <- abs(res.lat.OL)
mean(abs.lat.OL)
## [1] 1.416081e-17
##### FORM #####

abs.form.D <- abs(res.form.D)
mean(abs.form.D)
## [1] 0.5668177
abs.form.P1 <- abs(res.form.P1)
mean(abs.form.P1)
## [1] 0.4843616
abs.form.P1.R <- abs(res.form.P1.R)
mean(abs.form.P1.R)
## [1] 0.4242033
abs.form.LLSC <- abs(res.form.LLSC)
mean(abs.form.LLSC)
## [1] 0.9038801
abs.form.SBLL <- abs(res.form.SBLL)
mean(abs.form.SBLL)
## [1] 0.3399272
abs.form.BD <- abs(res.form.BD)
mean(abs.form.BD)
## [1] 0.04710005
abs.form.CPD <- abs(res.form.CPD)
mean(abs.form.CPD)
## [1] 0.03046568
abs.form.CPL <- abs(res.form.CPL)
mean(abs.form.CPL)
## [1] 0.03738523
abs.form.PreDL <- abs(res.form.PreDL)
mean(abs.form.PreDL)
## [1] 0.02618636
abs.form.DbL <- abs(res.form.DbL)
mean(abs.form.DbL)
## [1] 0.05067905
abs.form.HL <- abs(res.form.HL)
mean(abs.form.HL)
## [1] 0.03919057
abs.form.HD <- abs(res.form.HD)
mean(abs.form.HD)
## [1] 0.03507274
abs.form.HW <- abs(res.form.HW)
mean(abs.form.HW)
## [1] 0.03744209
abs.form.SnL <- abs(res.form.SnL)
mean(abs.form.SnL)
## [1] 0.03321741
abs.form.OL <- abs(res.form.OL)
mean(abs.form.OL)
## [1] 6.018764e-18
##### MEX #####

abs.mex.D <- abs(res.mex.D)
mean(abs.mex.D)
## [1] 0.1657275
abs.mex.P1 <- abs(res.mex.P1)
mean(abs.mex.P1)
## [1] 0.5686425
abs.mex.P1.R <- abs(res.mex.P1.R)
mean(abs.mex.P1.R)
## [1] 0.458723
abs.mex.LLSC <- abs(res.mex.LLSC)
mean(abs.mex.LLSC)
## [1] 0.4434954
abs.mex.SBLL <- abs(res.mex.SBLL)
mean(abs.mex.SBLL)
## [1] 0.2713231
abs.mex.BD <- abs(res.mex.BD)
mean(abs.mex.BD)
## [1] 0.04568114
abs.mex.CPD <- abs(res.mex.CPD)
mean(abs.mex.CPD)
## [1] 0.03277006
abs.mex.CPL <- abs(res.mex.CPL)
mean(abs.mex.CPL)
## [1] 0.03878753
abs.mex.PreDL <- abs(res.mex.PreDL)
mean(abs.mex.PreDL)
## [1] 0.06019307
abs.mex.DbL <- abs(res.mex.DbL)
mean(abs.mex.DbL)
## [1] 0.04686893
abs.mex.HL <- abs(res.mex.HL)
mean(abs.mex.HL)
## [1] 0.05562721
abs.mex.HD <- abs(res.mex.HD)
mean(abs.mex.HD)
## [1] 0.03668815
abs.mex.HW <- abs(res.mex.HW)
mean(abs.mex.HW)
## [1] 0.03882769
abs.mex.SnL <- abs(res.mex.SnL)
mean(abs.mex.SnL)
## [1] 0.04900589
abs.mex.OL <- abs(res.mex.OL)
mean(abs.mex.OL)
## [1] 4.001035e-18
#let's get this into the raw1 data set so that we can plot this more easily

abs.res.D <- c(abs.lat.D, abs.form.D, abs.mex.D)
abs.res.P1 <- c(abs.lat.P1, abs.form.P1, abs.mex.P1)
abs.res.P1.R <- c(abs.lat.P1.R, abs.form.P1.R, abs.mex.P1.R)
abs.res.LLSC<- c(abs.lat.LLSC, abs.form.LLSC, abs.mex.LLSC)
abs.res.SBLL<- c(abs.lat.SBLL, abs.form.SBLL, abs.mex.SBLL)
abs.res.BD<- c(abs.lat.BD, abs.form.BD, abs.mex.BD)
abs.res.CPD<- c(abs.lat.CPD, abs.form.CPD, abs.mex.CPD)
abs.res.CPL<- c(abs.lat.CPL, abs.form.CPL, abs.mex.CPL)
abs.res.PreDL <- c(abs.lat.PreDL, abs.form.PreDL, abs.mex.PreDL)
abs.res.DbL <- c(abs.lat.DbL, abs.form.DbL, abs.mex.DbL)
abs.res.HL<- c(abs.lat.HL, abs.form.HL, abs.mex.HL)
abs.res.HD<- c(abs.lat.HD, abs.form.HD, abs.mex.HD)
abs.res.HW <- c(abs.lat.HW, abs.form.HW, abs.mex.HW)
abs.res.SnL <- c(abs.lat.SnL, abs.form.SnL, abs.mex.SnL)
abs.res.OL <- c(abs.lat.OL, abs.form.OL, abs.mex.OL)


raw2 <- cbind(raw1, abs.res.D, abs.res.P1, abs.res.P1.R, abs.res.LLSC, abs.res.SBLL, abs.res.BD, abs.res.CPD, abs.res.CPL, abs.res.PreDL, abs.res.DbL, abs.res.HL, abs.res.HD, abs.res.HW, abs.res.SnL, abs.res.OL)

Residual Comparisons

  • STEP FOUR: plot the mean of the |residuals| for both species, for all traits influenced by body size
library(ggbeeswarm)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
ggplot(raw2, aes(SPP, abs.res.D)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

scatter_violin <- ggplot(data=raw2, aes(x=SPP, y=abs.res.D)) +
  geom_violin(trim = FALSE) + 
  stat_summary(
    fun.data = "mean_sdl",  fun.args = list(mult = 1),
    geom = "pointrange", color = "black"
    )

print(scatter_violin)

scatter_violin1 <- ggplot(data=raw2, aes(x=SPP, y=abs.res.D)) +
  geom_violin(trim = FALSE) + 
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="crossbar", fill="red", width=0.03)

print(scatter_violin1)

ggplot(raw2, aes(SPP, abs.res.P1)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(raw2, aes(SPP, abs.res.P1.R)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3) 

ggplot(raw2, aes(SPP, abs.res.LLSC)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(raw2, aes(SPP, abs.res.SBLL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(raw2, aes(SPP, abs.res.BD)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(raw2, aes(SPP, abs.res.CPD)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(raw2, aes(SPP, abs.res.CPL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3) 

ggplot(raw2, aes(SPP, abs.res.PreDL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(raw2, aes(SPP, abs.res.DbL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(raw2, aes(SPP, abs.res.HL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(raw2, aes(SPP, abs.res.HD)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(raw2, aes(SPP, abs.res.HW)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(raw2, aes(SPP, abs.res.SnL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(raw2, aes(SPP, abs.res.OL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

Comparing variation

Variance tests

  1. t-test for residuals, continuous (do an f-test too, might be cool)
  2. residual discrete will go through glmer
  3. raw discrete will go through f-test (no idea how to do this otherwise), and results will be compared to visualizations to check for discrepencies.

(note: did non-parametric instead of transforming, but Mike said to transform, so I copied that work to the ‘morphology-analysis-final’ Rmd).

F-test on residuals doesn’t make much sense; the residuals themselves are representative of the variation present, as they are the distance from the mean. Therefore, F-test on residuals is like variance of the variance. Instead, I have to do a t-test on the absolute value of the residuals. In this sense, we want to see if the mean of the absolute residuals is higher or lower for asexual species–is the average amount of variation higher or lower for this trait? Based on the regressions, if the trait was influenced by body size, I will perform a t-test on the absolute value of the residuals. If the trait was not influenced by body size, I will perform an F-test of variance on the raw data.

Quick results summary: For the F-test on raw data, none of the traits were significantly different (P2L/R, A, SBDF, FLA). For the t-tests on residuals, the only significant traits are left pectoral (), right pectoral (lat>form), scales above lateral line (), scales below lateral line (form>lat), and head length ().

-   will do two-tailed and check out the residual means to infer direction; for traits in which we use raw data, a one-tailed f-test will be perfomed in both direction to determine which species is varying more. We will also visulize the variation using a histogram to confirm direction results.

(1) Continuous, residuals

  • will do two-tailed and check out the means to infer direction (see beginning for trait means w/o body correction)
  • this will be for the traits that did have a significant regression compared to SL; will be using the absolute value residuals.
  • For continuous traits, we will be using the transformed data.
T-tests
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.BD and abs.form.BD
## t = 0.16042, df = 287.17, p-value = 0.8727
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.007699582  0.009066010
## sample estimates:
##  mean of x  mean of y 
## 0.04778326 0.04710005
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.CPD and abs.form.CPD
## t = 2.1468, df = 282.5, p-value = 0.03266
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.0005394989 0.0124439196
## sample estimates:
##  mean of x  mean of y 
## 0.03695739 0.03046568
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.CPD and abs.form.CPD
## t = 2.1468, df = 282.5, p-value = 0.01633
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  0.001501493         Inf
## sample estimates:
##  mean of x  mean of y 
## 0.03695739 0.03046568
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.CPD and abs.form.CPD
## t = 2.1468, df = 282.5, p-value = 0.9837
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##        -Inf 0.01148193
## sample estimates:
##  mean of x  mean of y 
## 0.03695739 0.03046568
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.CPL and abs.form.CPL
## t = -0.015652, df = 291.31, p-value = 0.9875
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.007002590  0.006892088
## sample estimates:
##  mean of x  mean of y 
## 0.03732998 0.03738523
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.PreDL and abs.form.PreDL
## t = 0.37319, df = 279.25, p-value = 0.7093
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.003813117  0.005597125
## sample estimates:
##  mean of x  mean of y 
## 0.02707836 0.02618636
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.DbL and abs.form.DbL
## t = 0.19254, df = 284.63, p-value = 0.8475
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.00883155  0.01074664
## sample estimates:
##  mean of x  mean of y 
## 0.05163660 0.05067905
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.HL and abs.form.HL
## t = -0.90175, df = 296.34, p-value = 0.3679
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.010530987  0.003912795
## sample estimates:
##  mean of x  mean of y 
## 0.03588147 0.03919057
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.HD and abs.form.HD
## t = -0.00099329, df = 267.38, p-value = 0.9992
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.007275651  0.007268313
## sample estimates:
##  mean of x  mean of y 
## 0.03506907 0.03507274
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.HW and abs.form.HW
## t = -1.6871, df = 297.98, p-value = 0.09263
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.0116134871  0.0008923157
## sample estimates:
##  mean of x  mean of y 
## 0.03208151 0.03744209
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.SnL and abs.form.SnL
## t = -0.38886, df = 295.99, p-value = 0.6977
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.006851803  0.004590862
## sample estimates:
##  mean of x  mean of y 
## 0.03208694 0.03321741
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.OL and abs.form.OL
## t = 1.318, df = 152.51, p-value = 0.1895
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -4.062607e-18  2.034670e-17
## sample estimates:
##    mean of x    mean of y 
## 1.416081e-17 6.018764e-18

(2) Discrete, residuals

Logan suggested doing a glm with poisson distribution (because it is count data) for these relationships. I originally performed Mann Whitney U tests, so will also include that. Overall, results don’t differ even though exact values are slightly different.

Had to remove the poisson part, becausethe residuals didn’t meet the non-integer requirement for poisson (it freaked out and took forever to load).

GLMs
raw3 <- raw2[raw2$SPP !="p.mexicana", ]

glm_D <- glm(abs.res.D~SPP, data=raw3)
print(summary(glm_D), show.residuals=TRUE)
## 
## Call:
## glm(formula = abs.res.D ~ SPP, data = raw3)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.5474  -0.3920  -0.1006   0.3170   1.6061  
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.56682    0.03151  17.989   <2e-16 ***
## SPPp.latipinna -0.02927    0.04715  -0.621    0.535    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.1648051)
## 
##     Null deviance: 49.175  on 299  degrees of freedom
## Residual deviance: 49.112  on 298  degrees of freedom
## AIC: 314.46
## 
## Number of Fisher Scoring iterations: 2
D.aov <- aov(glm_D)
summary(D.aov)
##              Df Sum Sq Mean Sq F value Pr(>F)
## SPP           1   0.06 0.06351   0.385  0.535
## Residuals   298  49.11 0.16481
glm_P1 <- glm(abs.res.P1~SPP, data=raw3)
print(summary(glm_P1), show.residuals=TRUE)
## 
## Call:
## glm(formula = abs.res.P1 ~ SPP, data = raw3)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.5006  -0.3420  -0.1328   0.2626   2.9146  
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.48436    0.03362  14.406   <2e-16 ***
## SPPp.latipinna  0.06231    0.05031   1.238    0.217    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.1876585)
## 
##     Null deviance: 56.210  on 299  degrees of freedom
## Residual deviance: 55.922  on 298  degrees of freedom
## AIC: 353.42
## 
## Number of Fisher Scoring iterations: 2
P1.aov <- aov(glm_P1)
summary(P1.aov)
##              Df Sum Sq Mean Sq F value Pr(>F)
## SPP           1   0.29  0.2878   1.534  0.217
## Residuals   298  55.92  0.1877
glm_P1.R <- glm(abs.res.P1.R~SPP, data=raw3)
print(summary(glm_P1.R), show.residuals=TRUE)
## 
## Call:
## glm(formula = abs.res.P1.R ~ SPP, data = raw3)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.5705  -0.2933  -0.1676   0.1833   1.6538  
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.42420    0.03055  13.887  < 2e-16 ***
## SPPp.latipinna  0.15574    0.04571   3.407 0.000746 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.1548898)
## 
##     Null deviance: 47.955  on 299  degrees of freedom
## Residual deviance: 46.157  on 298  degrees of freedom
## AIC: 295.84
## 
## Number of Fisher Scoring iterations: 2
P1.R.aov <- aov(glm_P1.R)
summary(P1.R.aov)
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## SPP           1   1.80  1.7983   11.61 0.000746 ***
## Residuals   298  46.16  0.1549                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
glm_LLSC <- glm(abs.res.LLSC~SPP, data=raw3)
print(summary(glm_LLSC), show.residuals=TRUE)
## 
## Call:
## glm(formula = abs.res.LLSC ~ SPP, data = raw3)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.9022  -0.4792  -0.2428   0.4210   3.5649  
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.90388    0.05395  16.755   <2e-16 ***
## SPPp.latipinna -0.13748    0.08072  -1.703   0.0896 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.4830762)
## 
##     Null deviance: 145.36  on 299  degrees of freedom
## Residual deviance: 143.96  on 298  degrees of freedom
## AIC: 637.08
## 
## Number of Fisher Scoring iterations: 2
LLSC.aov <- aov(glm_LLSC)
summary(LLSC.aov)
##              Df Sum Sq Mean Sq F value Pr(>F)  
## SPP           1    1.4  1.4013   2.901 0.0896 .
## Residuals   298  144.0  0.4831                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(3) Discrete, raw

EVEN THOUGH THESE ARE DISCRETE, NO IDEA WHAT ELSE TO DO WITH THEM (glmm on raw data will just compare means). Did both F-test and Levene’s test. Get the same overall results, albeit slightly different values. Might go with Levene’s since it is good with non-normal data.

This will only be for the traits that did not have a significant regression compared to SL (so P2s, A, SALL, SBDF, and FLA); none of these traits were transformed, so just doing raw for now.

Levene’s test

Still only performing this on the traits that did NOT vary with SL (P2L/R, A, SALL, SBLL [between sail and amz only], SBDF, FLA).

library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
library(carData)

(LT_P2L <- leveneTest(P2.L~SPP, data=raw3)) #gives nothing since it's all the same value
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1     NaN    NaN
##       298
(LT_P2R <- leveneTest(P2.R~SPP, data=raw3)) #same as above
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1     NaN    NaN
##       298
(LT_A <- leveneTest(A~SPP, data=raw3)) 
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1  0.3962 0.5295
##       298
(LT_SALL <- leveneTest(SALL~SPP, data=raw3))
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value  Pr(>F)  
## group   1  3.7062 0.05516 .
##       298                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(LT_SBLL <- leveneTest(SBLL~SPP, data=raw3))
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)  
## group   1    3.36 0.0678 .
##       298                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(LT_SBDF <- leveneTest(SBDF~SPP, data=raw3))
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1  0.0081 0.9283
##       298
(LT_FLA <- leveneTest(FLA~SPP, data=raw3))
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1  2.7164 0.1004
##       298

Summary of variance results

WHY WON’T THIS TABLE WORK!!!!!!!!

average.table <- matrix(c(MW_D\(p.value, mean(abs.lat.D, na.rm = TRUE), mean(abs.form.D, na.rm = TRUE), MW_P1\)p.value, mean(abs.lat.P1, na.rm = TRUE), mean(abs.form.P1, na.rm = TRUE), LT_P2R\("Pr(>F)", mean(lat\)P2.R, na.rm = TRUE), mean(form\(P2.R, na.rm = TRUE), LT_P2R\)“Pr(>F)”, mean(lat\(P2.L, na.rm = TRUE), mean(form\)P2.L, na.rm = TRUE), LT_A\("Pr(>F)", mean(lat\)A, na.rm = TRUE), mean(form\(A, na.rm = TRUE), MW_P1R\)p.value, mean(abs.lat.P1.R, na.rm = TRUE), mean(abs.form.P1.R, na.rm = TRUE), MW_LLSC\(p.value, mean(abs.lat.LLSC, na.rm = TRUE), mean(abs.form.LLSC, na.rm = TRUE), LT_SALL\)“Pr(>F)”, mean(lat\(SALL, na.rm = TRUE), mean(form\)SALL, na.rm = TRUE), LT_SBLL\("Pr(>F)", mean(abs.lat.SBLL, na.rm = TRUE), mean(abs.form.SBLL, na.rm = TRUE), LT_SBDF\)“Pr(>F)”, mean(lat\(SBDF, na.rm = TRUE), mean(form\)SBDF, na.rm = TRUE), ttest.abs.BD\(p.value, mean(abs.lat.BD, na.rm = TRUE), mean(abs.form.BD, na.rm = TRUE), ttest.abs.CPD\)p.value, mean(abs.lat.CPD, na.rm = TRUE), mean(abs.form.CPD, na.rm = TRUE), ttest.abs.CPL\(p.value, mean(abs.lat.CPL, na.rm = TRUE), mean(abs.form.CPL, na.rm = TRUE), ttest.abs.PreDL\)p.value, mean(abs.lat.PreDL, na.rm = TRUE), mean(abs.form.PreDL, na.rm = TRUE), ttest.abs.DbL\(p.value, mean(abs.lat.DbL, na.rm = TRUE), mean(abs.form.DbL, na.rm = TRUE), ttest.abs.HL\)p.value, mean(abs.lat.HL, na.rm = TRUE), mean(abs.form.HL, na.rm = TRUE), ttest.abs.HD\(p.value, mean(abs.lat.HD, na.rm = TRUE), mean(abs.form.HD, na.rm = TRUE), ttest.abs.HW\)p.value, mean(abs.lat.HW, na.rm = TRUE), mean(abs.form.HW, na.rm = TRUE), ttest.abs.SnL\(p.value, mean(abs.lat.SnL, na.rm = TRUE), mean(abs.form.SnL, na.rm = TRUE), ttest.abs.OL\)p.value, mean(abs.lat.OL, na.rm = TRUE), mean(abs.form.OL, na.rm = TRUE), LT_FLA\("Pr(>F)", mean(lat\)FLA, na.rm = TRUE), mean(form$FLA, na.rm = TRUE)), ncol=3, byrow=TRUE)

colnames(average.table)<- c(“p-value for variance”, “lat mean”, “form mean”)

rownames(average.table) <- c(“dorsal rays”, “L pect rays”, “R pelvic rays”, ”L pelvic rays”, “Anal rays”, “R pect rays”, ”lat line scales”, ”scales above LL”, ”scales below LL”, ”scales before dor”, “body dep”, “peduncle dep”, “peduncle leng”, “pre-dorsal”, “dorsal base”, “head length”, “head depth”, “head width”, “snout leng”, “orbital leng”, “fluct. asymmetries”)

average.table


Variance plots

plot_multi_histogram <- function(df, feature, label_column) {
    plt <- ggplot(df, aes(x=eval(parse(text=feature)), fill=eval(parse(text=label_column)))) +
    geom_histogram(bins=10, alpha=0.4, position="identity", aes(y = ..density..), color="black") +
    geom_density(alpha=0.2)  +
    labs(x=feature, y = "Density")
    plt + guides(fill=guide_legend(title=label_column))
}

plot_multi_histogram(raw3, "abs.res.CPD", "SPP")
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

plot_multi_histogram(raw3, "abs.res.P1.R", "SPP")

plot_multi_histogram(raw3, "SALL", "SPP")

PCA analysis

PCA

LOGAN: CHECK THAT EACH VARIABLES IS NEAR NORMALLY DISTRIBUTED. IF NOT, LOG TRANSFORM BEFORE PCA. ALSO CHECK THAT PCA CALCULATES Z SCORES AND PLOTS BASED ON THAT; IF NOT CONVERT TO Z SCORES THEN RUN PCA.

In this analysis, I will compare the principle components after centering and scaling the data. A PCA analysis will help us determine what aspects of morphology influence the variation in our data the most without worrying about differences in scales/measurements. Currently, data consists of 116 Sailfin and 186 Amazon.

Chart

(chart <- matrix(c("Variable chart:", "D: dorsal ray count", "P1: left pectoral ray count", "P2.R: right pelvic rays", "P2.L: left pelvic rays", "A: anal rays", "P1.R: right pectoral ray count", "LLSC: lateral line scale count", "SALL: scales above lateral line", "SBLL: scales below lateral line", "SBDF: scales before dorsal fin", "TL: total length", "SL: standard length", "BD: body depth", "CPD: caudal peduncle depth", "CPL: caudal peduncle length", "PreDL: pre-dorsal length", "DbL: dorsal base length", "HL/HW/HD: head length/width/depth", "SnL: snout length", "OL: ocular length"), ncol=1, byrow=TRUE))
##       [,1]                               
##  [1,] "Variable chart:"                  
##  [2,] "D: dorsal ray count"              
##  [3,] "P1: left pectoral ray count"      
##  [4,] "P2.R: right pelvic rays"          
##  [5,] "P2.L: left pelvic rays"           
##  [6,] "A: anal rays"                     
##  [7,] "P1.R: right pectoral ray count"   
##  [8,] "LLSC: lateral line scale count"   
##  [9,] "SALL: scales above lateral line"  
## [10,] "SBLL: scales below lateral line"  
## [11,] "SBDF: scales before dorsal fin"   
## [12,] "TL: total length"                 
## [13,] "SL: standard length"              
## [14,] "BD: body depth"                   
## [15,] "CPD: caudal peduncle depth"       
## [16,] "CPL: caudal peduncle length"      
## [17,] "PreDL: pre-dorsal length"         
## [18,] "DbL: dorsal base length"          
## [19,] "HL/HW/HD: head length/width/depth"
## [20,] "SnL: snout length"                
## [21,] "OL: ocular length"

Loadings

library(stringr)

raw1a <- subset(raw1, select = -c(17:18) ) #took out P2R and L since they don't vary at all
raw1a <- subset(raw1a, select=-c(34:49))#took out transformed values
raw1a <- raw1a[raw1a$SPP !="p.mexicana", ]

#PCA <- prcomp(raw1a[, 15:32], center=TRUE, scale. = TRUE) #includes all but pelvic traits, since they are the same in all species WILL SCALE/CENTER DATA ON MY OWN, LOGAN SAID IT IS NOT ALWAYS DOING IT FOR YOU EVEN WHEN YOU SPECIFY

z.scores <- scale(raw1a[, 15:33], center = TRUE, scale = TRUE)

raw1a <- cbind(raw1a, z.scores)

colnames(raw1a)[34:52] <- str_c( "z_", colnames(raw1a)[15:33] )


PCA <- prcomp(raw1a[, 34:52])

summary(PCA)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     3.0628 1.6531 1.14425 1.03347 0.98389 0.92628 0.89325
## Proportion of Variance 0.4937 0.1438 0.06891 0.05621 0.05095 0.04516 0.04199
## Cumulative Proportion  0.4937 0.6375 0.70647 0.76268 0.81363 0.85879 0.90078
##                            PC8     PC9    PC10    PC11   PC12    PC13    PC14
## Standard deviation     0.63539 0.56651 0.54153 0.50391 0.4226 0.34766 0.30647
## Proportion of Variance 0.02125 0.01689 0.01543 0.01336 0.0094 0.00636 0.00494
## Cumulative Proportion  0.92203 0.93892 0.95435 0.96772 0.9771 0.98348 0.98842
##                           PC15    PC16    PC17    PC18    PC19
## Standard deviation     0.26119 0.22675 0.22086 0.20498 0.09759
## Proportion of Variance 0.00359 0.00271 0.00257 0.00221 0.00050
## Cumulative Proportion  0.99201 0.99472 0.99729 0.99950 1.00000
(loadings <- PCA$rotation[, 1:5])
##                  PC1          PC2          PC3           PC4         PC5
## z_D     -0.023125918  0.512339587  0.274296565  0.1004003051  0.06565042
## z_P1    -0.021172287 -0.400745232  0.383033685  0.2535355028  0.01970006
## z_A     -0.002691019 -0.067691278  0.577547144  0.1247870940  0.17951684
## z_P1.R  -0.036159356 -0.424337470  0.298509581  0.3147900237  0.03199074
## z_LLSC  -0.080066865  0.274308397  0.251844804  0.0003399013 -0.31938239
## z_SALL  -0.050532616 -0.051296850 -0.486844238  0.6498672092  0.09308622
## z_SBLL  -0.057856335  0.108431193  0.058099357  0.3413704601 -0.82866776
## z_SBDF  -0.012995775 -0.409590786 -0.106164211 -0.3912320042 -0.38140946
## z_SL    -0.321020234 -0.055330649  0.006015599 -0.0470335950  0.01203157
## z_BD    -0.311442077  0.090305116 -0.004141675  0.0106665883  0.02609686
## z_CPD   -0.317091552 -0.007465525 -0.010294404 -0.0008030631  0.04637472
## z_CPL   -0.309216131 -0.069705738  0.021123514 -0.0006747957  0.02689966
## z_PreDL -0.299230116 -0.185161125 -0.042850750 -0.0702738205 -0.02769696
## z_DbL   -0.263082815  0.281326647  0.099848288  0.0405728383  0.07710593
## z_HL    -0.286373286 -0.053376922  0.070063685 -0.2068118584 -0.03298241
## z_HD    -0.311950929  0.025861089 -0.028257174 -0.0978219713  0.01977228
## z_HW    -0.312983506 -0.027005739 -0.102965431  0.0624696740  0.02652160
## z_SnL   -0.269756709 -0.018795144 -0.104316832  0.2067219100  0.08042476
## z_OL    -0.283210589  0.026602769  0.026187203 -0.1056545356 -0.01248445
sorted.loadings.1 <- loadings[order(loadings[, 1]), 1]
myTitle <- "Loadings Plot for PC1" 
myXlab  <- "Variable Loadings"
dotchart(sorted.loadings.1, main=myTitle, xlab=myXlab, cex=1.5, col="red")

sorted.loadings.2 <- loadings[order(loadings[, 2]), 2]
myTitle <- "Loadings Plot for PC2" 
myXlab  <- "Variable Loadings"
dotchart(sorted.loadings.2, main=myTitle, xlab=myXlab, cex=1.5, col="red")

sorted.loadings.3 <- loadings[order(loadings[, 3]), 3]
myTitle <- "Loadings Plot for PC3" 
myXlab  <- "Variable Loadings"
dotchart(sorted.loadings.3, main=myTitle, xlab=myXlab, cex=1.5, col="red")

VM_PCA <- varimax(PCA$rotation[, 1:3]) 

VM_PCA
## $loadings
## 
## Loadings:
##         PC1    PC2    PC3   
## z_D             0.579       
## z_P1           -0.170  0.526
## z_A             0.209  0.542
## z_P1.R         -0.228  0.463
## z_LLSC          0.368       
## z_SALL         -0.263 -0.405
## z_SBLL          0.130       
## z_SBDF         -0.406       
## z_SL    -0.324              
## z_BD    -0.298  0.120       
## z_CPD   -0.316              
## z_CPL   -0.313              
## z_PreDL -0.321 -0.140       
## z_DbL   -0.220  0.329       
## z_HL    -0.285              
## z_HD    -0.308              
## z_HW    -0.321              
## z_SnL   -0.277              
## z_OL    -0.275              
## 
##                  PC1   PC2   PC3
## SS loadings    1.000 1.000 1.000
## Proportion Var 0.053 0.053 0.053
## Cumulative Var 0.053 0.105 0.158
## 
## $rotmat
##            [,1]       [,2]        [,3]
## [1,] 0.99025106 -0.1389062 -0.01038808
## [2,] 0.11785093  0.8752334 -0.46912433
## [3,] 0.07425625  0.4633266  0.88307103
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
(eig.val <- get_eigenvalue(PCA))
##         eigenvalue variance.percent cumulative.variance.percent
## Dim.1  9.380664534      49.37191860                    49.37192
## Dim.2  2.732862474      14.38348671                    63.75541
## Dim.3  1.309310206       6.89110635                    70.64651
## Dim.4  1.068052941       5.62133127                    76.26784
## Dim.5  0.968046113       5.09497954                    81.36282
## Dim.6  0.857998424       4.51578118                    85.87860
## Dim.7  0.797895874       4.19945197                    90.07806
## Dim.8  0.403714450       2.12481290                    92.20287
## Dim.9  0.320936708       1.68914057                    93.89201
## Dim.10 0.293253289       1.54343836                    95.43545
## Dim.11 0.253927815       1.33646218                    96.77191
## Dim.12 0.178592564       0.93996086                    97.71187
## Dim.13 0.120864150       0.63612710                    98.34800
## Dim.14 0.093923089       0.49433204                    98.84233
## Dim.15 0.068220281       0.35905411                    99.20138
## Dim.16 0.051415517       0.27060799                    99.47199
## Dim.17 0.048780881       0.25674148                    99.72873
## Dim.18 0.042016114       0.22113744                    99.94987
## Dim.19 0.009524576       0.05012935                   100.00000
ind <- get_pca_ind(PCA)
head(ind$coord[,1:4])
##         Dim.1     Dim.2      Dim.3       Dim.4
## 1 -3.79218614 1.1636137  0.1800926 -0.08963595
## 2 -3.78917831 0.9596244  1.3893620 -2.02646387
## 3 -7.11228308 1.9828091 -0.7472805 -0.85903835
## 4  0.73532897 1.9695620 -1.5908193  0.72014647
## 5 -2.08118776 2.6499339 -0.3743038 -0.54991733
## 7  0.02419752 2.0659483 -1.8182757  1.04810458
raw1.p <- raw1[raw1$SPP !="p.mexicana", ]
raw1.p <- cbind(raw1.p, ind$coord[,1:4])
Post PCA tests
lat.p <- raw1.p[raw1.p$SPP == "p.latipinna",]


form.p <- raw1.p[raw1.p$SPP == "p.formosa",]

(pc1 <- t.test(lat.p$Dim.1, form.p$Dim.1, alternative = "two.sided"))
## 
##  Welch Two Sample t-test
## 
## data:  lat.p$Dim.1 and form.p$Dim.1
## t = -0.015658, df = 295.09, p-value = 0.9875
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.6980978  0.6870769
## sample estimates:
##    mean of x    mean of y 
## -0.003049100  0.002461322
(pc2 <- t.test(lat.p$Dim.2, form.p$Dim.2, alternative = "two.sided"))
## 
##  Welch Two Sample t-test
## 
## data:  lat.p$Dim.2 and form.p$Dim.2
## t = 31.156, df = 284.18, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  2.720668 3.087623
## sample estimates:
## mean of x mean of y 
##  1.606961 -1.297185
(pc3 <- t.test(lat.p$Dim.3, form.p$Dim.3, alternative = "two.sided"))
## 
##  Welch Two Sample t-test
## 
## data:  lat.p$Dim.3 and form.p$Dim.3
## t = 2.7177, df = 282.06, p-value = 0.00698
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.09875662 0.61762364
## sample estimates:
##  mean of x  mean of y 
##  0.1981985 -0.1599916
(pc1V <- leveneTest(Dim.1~SPP, data=raw1.p))
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1  1.9292 0.1659
##       298
(pc2V <- leveneTest(Dim.2~SPP, data=raw1.p))
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1  0.0258 0.8724
##       298
(pc3V <- leveneTest(Dim.3~SPP, data=raw1.p))
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1  0.0056 0.9404
##       298

Plots

PCA 1v2
library(AMR) 
library(ggplot2)
library(ggfortify)

plot1<- autoplot(PCA, data = raw1a, colour='SPP', loadings=FALSE, loadings.label=FALSE, frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal() 
plot1

PCA 2v3
plot5<- autoplot(PCA, x=2, y=3, data = raw1a, colour='SPP', loadings=FALSE, loadings.label=FALSE, frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal()
plot5


Tampico population

(1) Continuous, residuals

  • will do two-tailed and check out the means to infer direction (see beginning for trait means w/o body correction)
  • this will be for the traits that did have a significant regression compared to SL; will be using the absolute value residuals.
  • For continuous traits, we will be using the transformed data.
ANOVAs
tampico <- raw2[raw2$COUNTY.ST == "Tamaulipas/MX",]
lat.mx <- tampico[tampico$SPP == "p.latipinna",]
form.mx <- tampico[tampico$SPP == "p.formosa",]
mex.mx <- tampico[tampico$SPP == "p.mexicana",]



MX.BD <- aov(abs.res.BD~SPP, data=tampico)
summary(MX.BD)
##             Df  Sum Sq  Mean Sq F value Pr(>F)   
## SPP          2 0.01201 0.006007   5.421  0.006 **
## Residuals   89 0.09861 0.001108                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
MX.CPD <- aov(abs.res.CPD~SPP, data=tampico)
summary(MX.CPD)
##             Df  Sum Sq   Mean Sq F value Pr(>F)
## SPP          2 0.00190 0.0009478   2.121  0.126
## Residuals   89 0.03977 0.0004468
MX.CPL <- aov(abs.res.CPL~SPP, data=tampico)
summary(MX.CPL)
##             Df  Sum Sq   Mean Sq F value Pr(>F)  
## SPP          2 0.00424 0.0021221   4.029 0.0211 *
## Residuals   89 0.04688 0.0005267                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
MX.PreDL <- aov(abs.res.PreDL~SPP, data=tampico)
summary(MX.PreDL)
##             Df Sum Sq  Mean Sq F value Pr(>F)  
## SPP          2 0.0248 0.012406   2.978  0.056 .
## Residuals   89 0.3708 0.004166                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
MX.DbL <- aov(abs.res.DbL~SPP, data=tampico)
summary(MX.DbL)
##             Df  Sum Sq  Mean Sq F value Pr(>F)
## SPP          2 0.00329 0.001645   1.404  0.251
## Residuals   89 0.10427 0.001172
MX.HL <- aov(abs.res.HL~SPP, data=tampico)
summary(MX.HL)
##             Df  Sum Sq  Mean Sq F value Pr(>F)    
## SPP          2 0.01706 0.008529   11.73  3e-05 ***
## Residuals   89 0.06470 0.000727                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
MX.HD <- aov(abs.res.HD~SPP, data=tampico)
summary(MX.HD)
##             Df  Sum Sq   Mean Sq F value Pr(>F)  
## SPP          2 0.00325 0.0016271   2.471 0.0903 .
## Residuals   89 0.05861 0.0006585                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
MX.HW <- aov(abs.res.HW~SPP, data=tampico)
summary(MX.HW)
##             Df  Sum Sq   Mean Sq F value Pr(>F)  
## SPP          2 0.00426 0.0021285   3.921 0.0233 *
## Residuals   89 0.04831 0.0005429                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
MX.SnL <- aov(abs.res.SnL~SPP, data=tampico)
summary(MX.SnL)
##             Df  Sum Sq  Mean Sq F value Pr(>F)  
## SPP          2 0.00975 0.004874   3.616 0.0309 *
## Residuals   89 0.11996 0.001348                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
MX.OL <- aov(abs.res.OL~SPP, data=tampico)
summary(MX.OL)
##             Df    Sum Sq   Mean Sq F value Pr(>F)
## SPP          2 1.990e-35 9.948e-36   1.326  0.271
## Residuals   89 6.676e-34 7.501e-36

(2) Discrete, residuals

Logan suggested doing a glm with poisson distribution (because it is count data) for these relationships. I originally performed Mann Whitney U tests, so will also include that. Overall, results don’t differ even though exact values are slightly different.

Had to remove the poisson part, becausethe residuals didn’t meet the non-integer requirement for poisson (it freaked out and took forever to load).

GLMs
MX.glm_D <- glm(abs.res.D~SPP, data=tampico)
print(summary(MX.glm_D), show.residuals=TRUE)
## 
## Call:
## glm(formula = abs.res.D ~ SPP, data = tampico)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.48224  -0.26090  -0.11501   0.08735   1.41666  
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.50169    0.06279   7.990 4.55e-12 ***
## SPPp.latipinna -0.09573    0.08598  -1.113 0.268536    
## SPPp.mexicana  -0.33597    0.08598  -3.908 0.000181 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.110386)
## 
##     Null deviance: 11.6553  on 91  degrees of freedom
## Residual deviance:  9.8244  on 89  degrees of freedom
## AIC: 63.288
## 
## Number of Fisher Scoring iterations: 2
MX.D.aov <- aov(MX.glm_D)
summary(MX.D.aov)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## SPP          2  1.831  0.9155   8.293 0.000498 ***
## Residuals   89  9.824  0.1104                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
MX.glm_P1 <- glm(abs.res.P1~SPP, data=tampico)
print(summary(MX.glm_P1), show.residuals=TRUE)
## 
## Call:
## glm(formula = abs.res.P1 ~ SPP, data = tampico)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.5127  -0.3543  -0.1120   0.1266   2.8773  
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.52162    0.09641   5.411 5.25e-07 ***
## SPPp.latipinna  0.10544    0.13201   0.799    0.427    
## SPPp.mexicana   0.04702    0.13201   0.356    0.723    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.2602401)
## 
##     Null deviance: 23.329  on 91  degrees of freedom
## Residual deviance: 23.161  on 89  degrees of freedom
## AIC: 142.19
## 
## Number of Fisher Scoring iterations: 2
MX.P1.aov <- aov(MX.glm_P1)
summary(MX.P1.aov)
##             Df Sum Sq Mean Sq F value Pr(>F)
## SPP          2  0.168  0.0839   0.322  0.725
## Residuals   89 23.161  0.2602
MX.glm_P1.R <- glm(abs.res.P1.R~SPP, data=tampico)
print(summary(MX.glm_P1.R), show.residuals=TRUE)
## 
## Call:
## glm(formula = abs.res.P1.R ~ SPP, data = tampico)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.4554  -0.2619  -0.1498   0.1629   0.9018  
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.35672    0.06972   5.116 1.78e-06 ***
## SPPp.latipinna  0.22375    0.09547   2.344   0.0213 *  
## SPPp.mexicana   0.10200    0.09547   1.068   0.2882    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.1361049)
## 
##     Null deviance: 12.867  on 91  degrees of freedom
## Residual deviance: 12.113  on 89  degrees of freedom
## AIC: 82.556
## 
## Number of Fisher Scoring iterations: 2
MX.P1.R.aov <- aov(MX.glm_P1.R)
summary(MX.P1.R.aov)
##             Df Sum Sq Mean Sq F value Pr(>F)  
## SPP          2  0.754  0.3770    2.77 0.0681 .
## Residuals   89 12.113  0.1361                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
MX.glm_LLSC <- glm(abs.res.LLSC~SPP, data=tampico)
print(summary(MX.glm_LLSC), show.residuals=TRUE)
## 
## Call:
## glm(formula = abs.res.LLSC ~ SPP, data = tampico)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.5884  -0.2474  -0.1354   0.1936   1.0364  
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.68514    0.07721   8.873 6.87e-14 ***
## SPPp.latipinna -0.18794    0.10573  -1.778   0.0789 .  
## SPPp.mexicana  -0.24164    0.10573  -2.286   0.0247 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.1669345)
## 
##     Null deviance: 15.802  on 91  degrees of freedom
## Residual deviance: 14.857  on 89  degrees of freedom
## AIC: 101.34
## 
## Number of Fisher Scoring iterations: 2
MX.LLSC.aov <- aov(MX.glm_LLSC)
summary(MX.LLSC.aov)
##             Df Sum Sq Mean Sq F value Pr(>F)  
## SPP          2  0.945  0.4724    2.83 0.0643 .
## Residuals   89 14.857  0.1669                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
MX.glm_SBLL <- glm(abs.res.SBLL~SPP, data=tampico)
print(summary(MX.glm_SBLL), show.residuals=TRUE)
## 
## Call:
## glm(formula = abs.res.SBLL ~ SPP, data = tampico)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.27001  -0.17228  -0.09092  -0.01136   0.85609  
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)   
## (Intercept)     0.16989    0.05341   3.181  0.00202 **
## SPPp.latipinna  0.11045    0.07313   1.510  0.13452   
## SPPp.mexicana   0.10144    0.07313   1.387  0.16890   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.07987104)
## 
##     Null deviance: 7.3284  on 91  degrees of freedom
## Residual deviance: 7.1085  on 89  degrees of freedom
## AIC: 33.519
## 
## Number of Fisher Scoring iterations: 2
MX.SBLL.aov <- aov(MX.glm_SBLL)
summary(MX.SBLL.aov)
##             Df Sum Sq Mean Sq F value Pr(>F)
## SPP          2  0.220 0.10996   1.377  0.258
## Residuals   89  7.109 0.07987

(3) Discrete, raw

EVEN THOUGH THESE ARE DISCRETE, NO IDEA WHAT ELSE TO DO WITH THEM (glmm on raw data will just compare means). Did both F-test and Levene’s test. Get the same overall results, albeit slightly different values. Might go with Levene’s since it is good with non-normal data.

This will only be for the traits that did not have a significant regression compared to SL (so P2s, A, SALL, SBDF, and FLA); none of these traits were transformed, so just doing raw for now.

Levene’s test

Still only performing this on the traits that did NOT vary with SL (P2L/R, A, SALL, SBLL [between sail and amz only], SBDF, FLA).

library(car)
library(carData)

(LT_P2L <- leveneTest(P2.L~SPP, data=tampico)) #gives nothing since it's all the same value
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  2     NaN    NaN
##       89
(LT_P2R <- leveneTest(P2.R~SPP, data=tampico)) #same as above
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  2     NaN    NaN
##       89
(LT_A <- leveneTest(A~SPP, data=tampico)) 
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  2  0.2442 0.7838
##       89
(LT_SALL <- leveneTest(SALL~SPP, data=tampico))
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  2  0.9029 0.4091
##       89
(LT_SBDF <- leveneTest(SBDF~SPP, data=tampico))
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value  Pr(>F)  
## group  2   3.525 0.03363 *
##       89                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(LT_FLA <- leveneTest(FLA~SPP, data=tampico))
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value  Pr(>F)  
## group  2  4.0766 0.02023 *
##       89                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1







Filtered data

Now I will repeat analysis on a dataset that filters for fish above 32.5mm (this is the average for the range I found in literature, 28-37mm). This would confirm that all fish are likely adults, and remove any bias from juvenile proportions (ex. if juvenile head is proportionately larger than expected for body).

Initial steps

Data collection

library(dplyr)

rawF <- filter(raw1,SL>=32.5)

rawF1 <- rawF[-c(150), ] #this takes out the monster fish too

Checking normality

Checking normality

I will use Shapiro-wilke, histograms, and QQ plots to determine what traits are normal. These will only be performed on continuous variables, as discrete variables are not normal by nature.

Conclusions: literally all of them are NOT normal… will log transform them and run parametric tests (t-test, F-test, ANOVA). Not sure what to do with the discrete variables…[Logan said to use non-parametric OR a glmer with poisson distribution]

SW Test
shapiro.test(rawF$SL)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$SL
## W = 0.88785, p-value = 9.323e-12
shapiro.test(rawF$BD)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$BD
## W = 0.94379, p-value = 1.537e-07
shapiro.test(rawF$CPD)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$CPD
## W = 0.9366, p-value = 3.381e-08
shapiro.test(rawF$CPL)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$CPL
## W = 0.91011, p-value = 2.741e-10
shapiro.test(rawF$PreDL)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$PreDL
## W = 0.94232, p-value = 1.118e-07
shapiro.test(rawF$DbL)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$DbL
## W = 0.98676, p-value = 0.03793
shapiro.test(rawF$HL)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$HL
## W = 0.88931, p-value = 1.147e-11
shapiro.test(rawF$HD)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$HD
## W = 0.91737, p-value = 9.289e-10
shapiro.test(rawF$HW)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$HW
## W = 0.92699, p-value = 5.211e-09
shapiro.test(rawF$SnL)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$SnL
## W = 0.97288, p-value = 0.0002976
shapiro.test(rawF$OL)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$OL
## W = 0.9821, p-value = 0.006707
SW tests for df w/o monster
shapiro.test(rawF1$SL)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$SL
## W = 0.90902, p-value = 2.458e-10
shapiro.test(rawF1$BD)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$BD
## W = 0.95312, p-value = 1.374e-06
shapiro.test(rawF1$CPD)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$CPD
## W = 0.95682, p-value = 3.411e-06
shapiro.test(rawF1$CPL)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$CPL
## W = 0.92307, p-value = 2.703e-09
shapiro.test(rawF1$PreDL)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$PreDL
## W = 0.95065, p-value = 7.638e-07
shapiro.test(rawF1$DbL)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$DbL
## W = 0.98778, p-value = 0.05693
shapiro.test(rawF1$HL)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$HL
## W = 0.9142, p-value = 5.777e-10
shapiro.test(rawF1$HD)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$HD
## W = 0.95302, p-value = 1.34e-06
shapiro.test(rawF1$HW)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$HW
## W = 0.95207, p-value = 1.068e-06
shapiro.test(rawF1$SnL)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$SnL
## W = 0.97714, p-value = 0.001228
shapiro.test(rawF1$OL)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$OL
## W = 0.98583, p-value = 0.02724
Histograms
hist(rawF$SL, breaks=30)

hist(rawF$BD, breaks=30)

hist(rawF$CPD, breaks=30)

hist(rawF$CPL, breaks=30)

hist(rawF$PreDL, breaks=30)

hist(rawF$DbL, breaks=30)

hist(rawF$HL, breaks=30)

hist(rawF$HD, breaks=30)

hist(rawF$HW, breaks=30)

hist(rawF$SnL, breaks=30)

hist(rawF$OL, breaks=30)

Hists for df w/o monster
hist(rawF1$SL, breaks=30)

hist(rawF1$BD, breaks=30)

hist(rawF1$CPD, breaks=30)

hist(rawF1$CPL, breaks=30)

hist(rawF1$PreDL, breaks=30)

hist(rawF1$DbL, breaks=30)

hist(rawF1$HL, breaks=30)

hist(rawF1$HD, breaks=30)

hist(rawF1$HW, breaks=30)

hist(rawF1$SnL, breaks=30)

hist(rawF1$OL, breaks=30)

QQ plots
qqnorm(rawF$SL)
qqline(rawF$SL)

qqnorm(rawF$BD)
qqline(rawF$BD)

qqnorm(rawF$CPD)
qqline(rawF$CPD)

qqnorm(rawF$CPL)
qqline(rawF$CPL)

qqnorm(rawF$PreDL)
qqline(rawF$PreDL)

qqnorm(rawF$DbL)
qqline(rawF$DbL)

qqnorm(rawF$HL)
qqline(rawF$HL)

qqnorm(rawF$HD)
qqline(rawF$HD)

qqnorm(rawF$HW)
qqline(rawF$HW)

qqnorm(rawF$SnL)
qqline(rawF$SnL)

qqnorm(rawF$OL)
qqline(rawF$OL)

Log transformations

Since all of the continuous characters were not normal, I will log transform them, and proceed with the transformed data in all analyses.

Log transformations did not work on ANY of them. Will try square/cubed root. Square root didn’t work…neither did cubed or sqaured. Log seemed to get them the closest. Will continue with log….

#rawF[paste0(names(rawF), '_sq')] <- '^'(rawF[, 25:35], 2)
#raw1 (which rawF is filtered from) already has log of shit, so don't need to do again

#for some reason did the log of the whole data frame instead of those specific columns, so I will just remove the unnecessary columns instead of spending time on what went wrong.

#rawF <- subset(rawF, select = -c(52:65) )
#rawF <- subset(rawF, select = -c(74:88) )
#double checking normality with a SW test and QQ plot

shapiro.test(rawF$SL_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$SL_log
## W = 0.97841, p-value = 0.001827
shapiro.test(rawF$BD_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$BD_log
## W = 0.94873, p-value = 4.646e-07
shapiro.test(rawF$CPD_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$CPD_log
## W = 0.97229, p-value = 0.0002479
shapiro.test(rawF$CPL_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$CPL_log
## W = 0.97621, p-value = 0.0008721
shapiro.test(rawF$PreDL_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$PreDL_log
## W = 0.99056, p-value = 0.1603
shapiro.test(rawF$DbL_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$DbL_log
## W = 0.98022, p-value = 0.003423
shapiro.test(rawF$HL_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$HL_log
## W = 0.98727, p-value = 0.04594
shapiro.test(rawF$HD_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$HD_log
## W = 0.98318, p-value = 0.009921
shapiro.test(rawF$HW_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$HW_log
## W = 0.95211, p-value = 1.024e-06
shapiro.test(rawF$SnL_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$SnL_log
## W = 0.97428, p-value = 0.000465
shapiro.test(rawF$OL_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$OL_log
## W = 0.97841, p-value = 0.001827
hist(rawF$SL_log, breaks=30)

hist(rawF$BD_log, breaks=30)

hist(rawF$CPD_log, breaks=30)

hist(rawF$CPL_log, breaks=30)

hist(rawF$PreDL_log, breaks=30)

hist(rawF$DbL_log, breaks=30)

hist(rawF$HL_log, breaks=30)

hist(rawF$HD_log, breaks=30)

hist(rawF$HW_log, breaks=30)

hist(rawF$SnL_log, breaks=30)

hist(rawF$OL_log, breaks=30)

qqnorm(rawF$SL_log)
qqline(rawF$SL_log)

qqnorm(rawF$BD_log)
qqline(rawF$BD_log)

qqnorm(rawF$CPD_log)
qqline(rawF$CPD_log)

qqnorm(rawF$CPL_log)
qqline(rawF$CPL_log)

qqnorm(rawF$PreDL_log)
qqline(rawF$PreDL_log)

qqnorm(rawF$DbL_log)
qqline(rawF$DbL_log)

qqnorm(rawF$HL_log)
qqline(rawF$HL_log)

qqnorm(rawF$HD_log)
qqline(rawF$HD_log)

qqnorm(rawF$HW_log)
qqline(rawF$HW_log)

qqnorm(rawF$SnL_log)
qqline(rawF$SnL_log)

qqnorm(rawF$OL_log)
qqline(rawF$OL_log)

W/o monster
#rawF1[paste0(names(rawF1), '_log')] <- log(rawF1[, 25:35], 10)

#for some reason did the log of the whole data frame instead of those specific columns, so I will just remove the unnecessary columns instead of spending time on what went wrong.

#rawF1 <- subset(rawF1, select = -c(37:50) )
#rawF <- subset(rawF, select = -c(59:94) )
#double checking normality with a SW test and QQ plot

shapiro.test(rawF1$SL_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$SL_log
## W = 0.97705, p-value = 0.001191
shapiro.test(rawF1$BD_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$BD_log
## W = 0.95933, p-value = 6.496e-06
shapiro.test(rawF1$CPD_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$CPD_log
## W = 0.98607, p-value = 0.02987
shapiro.test(rawF1$CPL_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$CPL_log
## W = 0.98412, p-value = 0.01442
shapiro.test(rawF1$PreDL_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$PreDL_log
## W = 0.9913, p-value = 0.2133
shapiro.test(rawF1$DbL_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$DbL_log
## W = 0.97967, p-value = 0.002917
shapiro.test(rawF1$HL_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$HL_log
## W = 0.99054, p-value = 0.1616
shapiro.test(rawF1$HD_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$HD_log
## W = 0.98823, p-value = 0.06757
shapiro.test(rawF1$HW_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$HW_log
## W = 0.95564, p-value = 2.541e-06
shapiro.test(rawF1$SnL_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$SnL_log
## W = 0.97456, p-value = 0.0005271
shapiro.test(rawF1$OL_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$OL_log
## W = 0.97705, p-value = 0.001191
qqnorm(rawF1$SL_log)
qqline(rawF1$SL_log)

qqnorm(rawF1$BD_log)
qqline(rawF1$BD_log)

qqnorm(rawF1$CPD_log)
qqline(rawF1$CPD_log)

qqnorm(rawF1$CPL_log)
qqline(rawF1$CPL_log)

qqnorm(rawF1$PreDL_log)
qqline(rawF1$PreDL_log)

qqnorm(rawF1$DbL_log)
qqline(rawF1$DbL_log)

qqnorm(rawF1$HL_log)
qqline(rawF1$HL_log)

qqnorm(rawF1$HD_log)
qqline(rawF1$HD_log)

qqnorm(rawF1$HW_log)
qqline(rawF1$HW_log)

qqnorm(rawF1$SnL_log)
qqline(rawF1$SnL_log)

qqnorm(rawF1$OL_log)
qqline(rawF1$OL_log)

Correcting for body size (residuals)

SL correction

Since amazons are in general bigger than sailfin, we don’t want any results to be due to this difference in body size bias. Therefore, we will see what traits are influenced by body size (regressions) and correct for body size when necessary (absolute value of residuals). We can then use the residuals when comparing between species for traits that are influenced by body size, and raw/transformed data for traits that are not influenced by body size. I will also calculate standardized residuals to compare residuals across traits in later analyses.

Quick results summary: traits not influenced by body size are left & right pelvic, anal, scales above and below lateral line (except in mexicana), scales before dorsal fin and fluctuating asymmetry; all other traits influenced by body size.

Regressions

  • STEP ONE: plot trait vs body size
library(ggplot2)
library(ggpubr)



lat.F <- rawF[rawF$SPP == "p.latipinna",]


form.F <- rawF[rawF$SPP == "p.formosa",]


mex.F <- rawF[rawF$SPP == "p.mexicana",]



##### LAT #####
F.reg.lat.D <- lm(lat.F$D ~ lat.F$SL)
F.sd.lat.D <- rstandard(F.reg.lat.D)
F.reg.lat.D.plot <- ggplot(lat.F, aes(x = SL, y = D)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.D.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.P1 <- lm(lat.F$P1 ~ lat.F$SL)
F.sd.lat.P1 <- rstandard(F.reg.lat.P1)
F.reg.lat.P1.plot <- ggplot(lat.F, aes(x = SL, y = P1)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.P1.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.P2.L <- lm(lat.F$P2.L ~ lat.F$SL)
F.sd.lat.P2.L <- rstandard(F.reg.lat.P2.L)
F.reg.lat.P2.L.plot <- ggplot(lat.F, aes(x = SL, y = P2.L)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.P2.L.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.P2.R <- lm(lat.F$P2.R ~ lat.F$SL)
F.sd.lat.P2.R <- rstandard(F.reg.lat.P2.R)
F.reg.lat.P2.R.plot <- ggplot(lat.F, aes(x = SL, y = P2.R)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.P2.R.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.A <- lm(lat.F$A ~ lat.F$SL)
F.sd.lat.A <- rstandard(F.reg.lat.A)
F.reg.lat.A.plot <- ggplot(lat.F, aes(x = SL, y = A)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.A.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.P1.R <- lm(lat.F$P1.R ~ lat.F$SL)
F.sd.lat.P1.R <- rstandard(F.reg.lat.P1.R)
F.reg.lat.P1.R.plot <- ggplot(lat.F, aes(x = SL, y = P1.R)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.P1.R.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.LLSC <- lm(lat.F$LLSC ~ lat.F$SL)
F.sd.lat.LLSC <- rstandard(F.reg.lat.LLSC)
F.reg.lat.LLSC.plot <- ggplot(lat.F, aes(x = SL, y = LLSC)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.LLSC.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.SALL <- lm(lat.F$SALL ~ lat.F$SL)
F.sd.lat.SALL <- rstandard(F.reg.lat.SALL)
F.reg.lat.SALL.plot <- ggplot(lat.F, aes(x = SL, y = SALL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.SALL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.SBLL <- lm(lat.F$SBLL ~ lat.F$SL)
F.sd.lat.SBLL <- rstandard(F.reg.lat.SBLL)
F.reg.lat.SBLL.plot <- ggplot(lat.F, aes(x = SL, y = SBLL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.SBLL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.SBDF <- lm(lat.F$SBDF ~ lat.F$SL)
F.sd.lat.SBDF <- rstandard(F.reg.lat.SBDF)
F.reg.lat.SBDF.plot <- ggplot(lat.F, aes(x = SL, y = SBDF)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.SBDF.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.BD <- lm(lat.F$BD_log ~ lat.F$SL_log)
F.sd.lat.BD <- rstandard(F.reg.lat.BD)
F.reg.lat.BD.plot <- ggplot(lat.F, aes(x = SL_log, y = BD_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.BD.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.CPD <- lm(lat.F$CPD_log ~ lat.F$SL_log)
F.sd.lat.CPD <- rstandard(F.reg.lat.CPD)
F.reg.lat.CPD.plot <- ggplot(lat.F, aes(x = SL_log, y = CPD_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.CPD.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.CPL <- lm(lat.F$CPL_log ~ lat.F$SL_log)
F.sd.lat.CPL <- rstandard(F.reg.lat.CPL)
F.reg.lat.CPL.plot <- ggplot(lat.F, aes(x = SL_log, y = CPL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.CPL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.PreDL <- lm(lat.F$PreDL_log ~ lat.F$SL_log)
F.sd.lat.PreDL <- rstandard(F.reg.lat.PreDL)
F.reg.lat.PreDL.plot <- ggplot(lat.F, aes(x = SL_log, y = PreDL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.PreDL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.DbL <- lm(lat.F$DbL_log ~ lat.F$SL_log)
F.sd.lat.DbL <- rstandard(F.reg.lat.DbL)
F.reg.lat.DbL.plot <- ggplot(lat.F, aes(x = SL_log, y = DbL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.DbL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.HL <- lm(lat.F$HL_log ~ lat.F$SL_log)
F.sd.lat.HL <- rstandard(F.reg.lat.HL)
F.reg.lat.HL.plot <- ggplot(lat.F, aes(x = SL_log, y = HL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.HL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.HD <- lm(lat.F$HD_log ~ lat.F$SL_log)
F.sd.lat.HD <- rstandard(F.reg.lat.HD)
F.reg.lat.HD.plot <- ggplot(lat.F, aes(x = SL_log, y = HD_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.HD.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.HW <- lm(lat.F$HW_log ~ lat.F$SL_log)
F.sd.lat.HW <- rstandard(F.reg.lat.HW)
F.reg.lat.HW.plot <- ggplot(lat.F, aes(x = SL_log, y = HW_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.HW.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.SnL <- lm(lat.F$SnL_log ~ lat.F$SL_log)
F.sd.lat.SnL <- rstandard(F.reg.lat.SnL)
F.reg.lat.SnL.plot <- ggplot(lat.F, aes(x = SL_log, y = SnL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.SnL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.OL <- lm(lat.F$OL_log ~ lat.F$SL_log)
F.sd.lat.OL <- rstandard(F.reg.lat.OL)
F.reg.lat.OL.plot <- ggplot(lat.F, aes(x = SL_log, y = OL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.OL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.FLA <- lm(lat.F$FLA ~ lat.F$SL)
F.sd.lat.FLA <- rstandard(F.reg.lat.FLA)
F.reg.lat.FLA.plot <- ggplot(lat.F, aes(x = SL, y = FLA)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.FLA.plot
## `geom_smooth()` using formula = 'y ~ x'

##### FORM #####

F.reg.form.D <- lm(form.F$D ~ form.F$SL)
F.sd.form.D <- rstandard(F.reg.form.D)
F.reg.form.D.plot <- ggplot(form.F, aes(x =SL, y = D)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.D.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.P1 <- lm(form.F$P1 ~ form.F$SL)
F.sd.form.P1 <- rstandard(F.reg.form.P1)
F.reg.form.P1.plot <- ggplot(form.F, aes(x = SL, y = P1)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.P1.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.P2.L <- lm(form.F$P2.L ~ form.F$SL)
F.sd.form.P2.L <- rstandard(F.reg.form.P2.L)
F.reg.form.P2.L.plot <- ggplot(form.F, aes(x = SL, y = P2.L)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.P2.L.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.P2.R <- lm(form.F$P2.R ~ form.F$SL)
F.sd.form.P2.R <- rstandard(F.reg.form.P2.R)
F.reg.form.P2.R.plot <- ggplot(form.F, aes(x = SL, y = P2.R)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.P2.R.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.A <- lm(form.F$A ~ form.F$SL)
F.sd.form.A <- rstandard(F.reg.form.A)
F.reg.form.A.plot <- ggplot(form.F, aes(x = SL, y = A)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.A.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.P1.R <- lm(form.F$P1.R ~ form.F$SL)
F.sd.form.P1.R <- rstandard(F.reg.form.P1.R)
F.reg.form.P1.R.plot <- ggplot(form.F, aes(x = SL, y = P1.R)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.P1.R.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.LLSC <- lm(form.F$LLSC ~ form.F$SL)
F.sd.form.LLSC <- rstandard(F.reg.form.LLSC)
F.reg.form.LLSC.plot <- ggplot(form.F, aes(x = SL, y = LLSC)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.LLSC.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.SALL <- lm(form.F$SALL ~ form.F$SL)
F.sd.form.SALL <- rstandard(F.reg.form.SALL)
F.reg.form.SALL.plot <- ggplot(form.F, aes(x = SL, y = SALL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.SALL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.SBLL <- lm(form.F$SBLL ~ form.F$SL)
F.sd.form.SBLL <- rstandard(F.reg.form.SBLL)
F.reg.form.SBLL.plot <- ggplot(form.F, aes(x = SL, y = SBLL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.SBLL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.SBDF <- lm(form.F$SBDF ~ form.F$SL)
F.sd.form.SBDF <- rstandard(F.reg.form.SBDF)
F.reg.form.SBDF.plot <- ggplot(form.F, aes(x = SL, y = SBDF)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.SBDF.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.BD <- lm(form.F$BD_log ~ form.F$SL_log)
F.sd.form.BD <- rstandard(F.reg.form.BD)
F.reg.form.BD.plot <- ggplot(form.F, aes(x = SL_log, y = BD_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.BD.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.CPD <- lm(form.F$CPD_log ~ form.F$SL_log)
F.sd.form.CPD <- rstandard(F.reg.form.CPD)
F.reg.form.CPD.plot <- ggplot(form.F, aes(x = SL_log, y = CPD_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.CPD.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.CPL <- lm(form.F$CPL_log ~ form.F$SL_log)
F.sd.form.CPL <- rstandard(F.reg.form.CPL)
F.reg.form.CPL.plot <- ggplot(form.F, aes(x = SL_log, y = CPL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.CPL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.PreDL <- lm(form.F$PreDL_log ~ form.F$SL_log)
F.sd.form.PreDL <- rstandard(F.reg.form.PreDL)
F.reg.form.PreDL.plot <- ggplot(form.F, aes(x = SL_log, y = PreDL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.PreDL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.DbL <- lm(form.F$DbL_log ~ form.F$SL_log)
F.sd.form.DbL <- rstandard(F.reg.form.DbL)
F.reg.form.DbL.plot <- ggplot(form.F, aes(x = SL_log, y = DbL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.DbL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.HL <- lm(form.F$HL_log ~ form.F$SL_log)
F.sd.form.HL <- rstandard(F.reg.form.HL)
F.reg.form.HL.plot <- ggplot(form.F, aes(x = SL_log, y = HL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.HL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.HD <- lm(form.F$HD_log ~ form.F$SL_log)
F.sd.form.HD <- rstandard(F.reg.form.HD)
F.reg.form.HD.plot <- ggplot(form.F, aes(x = SL_log, y = HD_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.HD.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.HW <- lm(form.F$HW_log ~ form.F$SL_log)
F.sd.form.HW <- rstandard(F.reg.form.HW)
F.reg.form.HW.plot <- ggplot(form.F, aes(x = SL_log, y = HW_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.HW.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.SnL <- lm(form.F$SnL_log ~ form.F$SL_log)
F.sd.form.SnL <- rstandard(F.reg.form.SnL)
F.reg.form.SnL.plot <- ggplot(form.F, aes(x = SL_log, y = SnL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.SnL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.OL <- lm(form.F$OL_log ~ form.F$SL_log)
F.sd.form.OL <- rstandard(F.reg.form.OL)
F.reg.form.OL.plot <- ggplot(form.F, aes(x = SL_log, y = OL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.OL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.FLA <- lm(form.F$FLA ~ form.F$SL)
F.sd.form.FLA <- rstandard(F.reg.form.FLA)
F.reg.form.FLA.plot <- ggplot(form.F, aes(x = SL, y = FLA)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.FLA.plot
## `geom_smooth()` using formula = 'y ~ x'

##### MEX #####
F.reg.mex.D <- lm(mex.F$D ~ mex.F$SL)
F.sd.mex.D <- rstandard(F.reg.mex.D)
F.reg.mex.D.plot <- ggplot(mex.F, aes(x = SL, y = D)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.D.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.P1 <- lm(mex.F$P1 ~ mex.F$SL)
F.sd.mex.P1 <- rstandard(F.reg.mex.P1)
F.reg.mex.P1.plot <- ggplot(mex.F, aes(x = SL, y = P1)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.P1.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.P2.L <- lm(mex.F$P2.L ~ mex.F$SL)
F.sd.mex.P2.L <- rstandard(F.reg.mex.P2.L)
F.reg.mex.P2.L.plot <- ggplot(mex.F, aes(x = SL, y = P2.L)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.P2.L.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.P2.R <- lm(mex.F$P2.R ~ mex.F$SL)
F.sd.mex.P2.R <- rstandard(F.reg.mex.P2.R)
F.reg.mex.P2.R.plot <- ggplot(mex.F, aes(x = SL, y = P2.R)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.P2.R.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.A <- lm(mex.F$A ~ mex.F$SL)
F.sd.mex.A <- rstandard(F.reg.mex.A)
F.reg.mex.A.plot <- ggplot(mex.F, aes(x = SL, y = A)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.A.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.P1.R <- lm(mex.F$P1.R ~ mex.F$SL)
F.sd.mex.P1.R <- rstandard(F.reg.mex.P1.R)
F.reg.mex.P1.R.plot <- ggplot(mex.F, aes(x = SL, y = P1.R)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.P1.R.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.LLSC <- lm(mex.F$LLSC ~ mex.F$SL)
F.sd.mex.LLSC <- rstandard(F.reg.mex.LLSC)
F.reg.mex.LLSC.plot <- ggplot(mex.F, aes(x = SL, y = LLSC)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.LLSC.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.SALL <- lm(mex.F$SALL ~ mex.F$SL)
F.sd.mex.SALL <- rstandard(F.reg.mex.SALL)
F.reg.mex.SALL.plot <- ggplot(mex.F, aes(x = SL, y = SALL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.SALL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.SBLL <- lm(mex.F$SBLL ~ mex.F$SL)
F.sd.mex.SBLL <- rstandard(F.reg.mex.SBLL)
F.reg.mex.SBLL.plot <- ggplot(mex.F, aes(x = SL, y = SBLL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.SBLL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.SBDF <- lm(mex.F$SBDF ~ mex.F$SL)
F.sd.mex.SBDF <- rstandard(F.reg.mex.SBDF)
F.reg.mex.SBDF.plot <- ggplot(mex.F, aes(x = SL, y = SBDF)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.SBDF.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.BD <- lm(mex.F$BD_log ~ mex.F$SL_log)
F.sd.mex.BD <- rstandard(F.reg.mex.BD)
F.reg.mex.BD.plot <- ggplot(mex.F, aes(x = SL_log, y = BD_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.BD.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.CPD <- lm(mex.F$CPD_log ~ mex.F$SL_log)
F.sd.mex.CPD <- rstandard(F.reg.mex.CPD)
F.reg.mex.CPD.plot <- ggplot(mex.F, aes(x = SL_log, y = CPD_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.CPD.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.CPL <- lm(mex.F$CPL_log ~ mex.F$SL_log)
F.sd.mex.CPL <- rstandard(F.reg.mex.CPL)
F.reg.mex.CPL.plot <- ggplot(mex.F, aes(x = SL_log, y = CPL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.CPL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.PreDL <- lm(mex.F$PreDL_log ~ mex.F$SL_log)
F.sd.mex.PreDL <- rstandard(F.reg.mex.PreDL)
F.reg.mex.PreDL.plot <- ggplot(mex.F, aes(x = SL_log, y = PreDL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.PreDL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.DbL <- lm(mex.F$DbL_log ~ mex.F$SL_log)
F.sd.mex.DbL <- rstandard(F.reg.mex.DbL)
F.reg.mex.DbL.plot <- ggplot(mex.F, aes(x = SL_log, y = DbL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.DbL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.HL <- lm(mex.F$HL_log ~ mex.F$SL_log)
F.sd.mex.HL <- rstandard(F.reg.mex.HL)
F.reg.mex.HL.plot <- ggplot(mex.F, aes(x = SL_log, y = HL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.HL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.HD <- lm(mex.F$HD_log ~ mex.F$SL_log)
F.sd.mex.HD <- rstandard(F.reg.mex.HD)
F.reg.mex.HD.plot <- ggplot(mex.F, aes(x = SL_log, y = HD_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.HD.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.HW <- lm(mex.F$HW_log ~ mex.F$SL_log)
F.sd.mex.HW <- rstandard(F.reg.mex.HW)
F.reg.mex.HW.plot <- ggplot(mex.F, aes(x = SL_log, y = HW_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.HW.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.SnL <- lm(mex.F$SnL_log ~ mex.F$SL_log)
F.sd.mex.SnL <- rstandard(F.reg.mex.SnL)
F.reg.mex.SnL.plot <- ggplot(mex.F, aes(x = SL_log, y = SnL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.SnL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.OL <- lm(mex.F$OL_log ~ mex.F$SL_log)
F.sd.mex.OL <- rstandard(F.reg.mex.OL)
F.reg.mex.OL.plot <- ggplot(mex.F, aes(x = SL_log, y = OL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.OL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.FLA <- lm(mex.F$FLA ~ mex.F$SL)
F.sd.mex.FLA <- rstandard(F.reg.mex.FLA)
F.reg.mex.FLA.plot <- ggplot(mex.F, aes(x = SL, y = FLA)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.FLA.plot
## `geom_smooth()` using formula = 'y ~ x'

Residuals

  • STEP TWO: get residuals for each individual for traits that were influenced by body size

  • STEP THREE: convert residuals to absolute value

##### LAT #####

F.abs.lat.D <- abs(F.res.lat.D)
mean(F.abs.lat.D)
## [1] 0.5690529
F.abs.lat.P1.R <- abs(F.res.lat.P1.R)
mean(F.abs.lat.P1.R)
## [1] 0.5782063
F.abs.lat.LLSC <- abs(F.res.lat.LLSC)
mean(F.abs.lat.LLSC)
## [1] 0.7316606
F.abs.lat.SBLL <- abs(F.res.lat.SBLL)
mean(F.abs.lat.SBLL)
## [1] 0.274228
F.abs.lat.BD <- abs(F.res.lat.BD)
mean(F.abs.lat.BD)
## [1] 0.03134439
F.abs.lat.CPD <- abs(F.res.lat.CPD)
mean(F.abs.lat.CPD)
## [1] 0.03008961
F.abs.lat.CPL <- abs(F.res.lat.CPL)
mean(F.abs.lat.CPL)
## [1] 0.031172
F.abs.lat.PreDL <- abs(F.res.lat.PreDL)
mean(F.abs.lat.PreDL)
## [1] 0.04262162
F.abs.lat.DbL <- abs(F.res.lat.DbL)
mean(F.abs.lat.DbL)
## [1] 0.04095673
F.abs.lat.HL <- abs(F.res.lat.HL)
mean(F.abs.lat.HL)
## [1] 0.02983505
F.abs.lat.HD <- abs(F.res.lat.HD)
mean(F.abs.lat.HD)
## [1] 0.02970775
F.abs.lat.HW <- abs(F.res.lat.HW)
mean(F.abs.lat.HW)
## [1] 0.02753679
F.abs.lat.SnL <- abs(F.res.lat.SnL)
mean(F.abs.lat.SnL)
## [1] 0.02404323
F.abs.lat.OL <- abs(F.res.lat.OL)
mean(F.abs.lat.OL)
## [1] 4.47166e-18
##### FORM #####

F.abs.form.D <- abs(F.res.form.D)
mean(F.abs.form.D)
## [1] 0.5057929
F.abs.form.P1.R <- abs(F.res.form.P1.R)
mean(F.abs.form.P1.R)
## [1] 0.4644069
F.abs.form.LLSC <- abs(F.res.form.LLSC)
mean(F.abs.form.LLSC)
## [1] 0.8024266
F.abs.form.SBLL <- abs(F.res.form.SBLL)
mean(F.abs.form.SBLL)
## [1] 0.3032569
F.abs.form.BD <- abs(F.res.form.BD)
mean(F.abs.form.BD)
## [1] 0.03633792
F.abs.form.CPD <- abs(F.res.form.CPD)
mean(F.abs.form.CPD)
## [1] 0.02681247
F.abs.form.CPL <- abs(F.res.form.CPL)
mean(F.abs.form.CPL)
## [1] 0.03066026
F.abs.form.PreDL <- abs(F.res.form.PreDL)
mean(F.abs.form.PreDL)
## [1] 0.04651937
F.abs.form.DbL <- abs(F.res.form.DbL)
mean(F.abs.form.DbL)
## [1] 0.03191869
F.abs.form.HL <- abs(F.res.form.HL)
mean(F.abs.form.HL)
## [1] 0.03006365
F.abs.form.HD <- abs(F.res.form.HD)
mean(F.abs.form.HD)
## [1] 0.02822344
F.abs.form.HW <- abs(F.res.form.HW)
mean(F.abs.form.HW)
## [1] 0.02989707
F.abs.form.SnL <- abs(F.res.form.SnL)
mean(F.abs.form.SnL)
## [1] 0.02797642
F.abs.form.OL <- abs(F.res.form.OL)
mean(F.abs.form.OL)
## [1] 2.211292e-17
##### MEX #####

F.abs.mex.D <- abs(F.res.mex.D)
mean(F.abs.mex.D)
## [1] 0.09391999
F.abs.mex.P1.R <- abs(F.res.mex.P1.R)
mean(F.abs.mex.P1.R)
## [1] 0.5711081
F.abs.mex.LLSC <- abs(F.res.mex.LLSC)
mean(F.abs.mex.LLSC)
## [1] 0.4403132
F.abs.mex.SBLL <- abs(F.res.mex.SBLL)
mean(F.abs.mex.SBLL)
## [1] 0.2860922
F.abs.mex.BD <- abs(F.res.mex.BD)
mean(F.abs.mex.BD)
## [1] 0.03040541
F.abs.mex.CPD <- abs(F.res.mex.CPD)
mean(F.abs.mex.CPD)
## [1] 0.02942755
F.abs.mex.CPL <- abs(F.res.mex.CPL)
mean(F.abs.mex.CPL)
## [1] 0.0309752
F.abs.mex.PreDL <- abs(F.res.mex.PreDL)
mean(F.abs.mex.PreDL)
## [1] 0.04357568
F.abs.mex.DbL <- abs(F.res.mex.DbL)
mean(F.abs.mex.DbL)
## [1] 0.02689813
F.abs.mex.HL <- abs(F.res.mex.HL)
mean(F.abs.mex.HL)
## [1] 0.04816588
F.abs.mex.HD <- abs(F.res.mex.HD)
mean(F.abs.mex.HD)
## [1] 0.0318384
F.abs.mex.HW <- abs(F.res.mex.HW)
mean(F.abs.mex.HW)
## [1] 0.02994677
F.abs.mex.SnL <- abs(F.res.mex.SnL)
mean(F.abs.mex.SnL)
## [1] 0.03665857
F.abs.mex.OL <- abs(F.res.mex.OL)
mean(F.abs.mex.OL)
## [1] 3.901712e-18
#let's get this into the raw1 data set so that we can plot this more easily

F.abs.res.D <- c(F.abs.lat.D, F.abs.form.D, F.abs.mex.D)
F.abs.res.P1.R <- c(F.abs.lat.P1.R, F.abs.form.P1.R, F.abs.mex.P1.R)
F.abs.res.LLSC<- c(F.abs.lat.LLSC, F.abs.form.LLSC, F.abs.mex.LLSC)
F.abs.res.SBLL<- c(F.abs.lat.SBLL, F.abs.form.SBLL, F.abs.mex.SBLL)
F.abs.res.BD<- c(F.abs.lat.BD, F.abs.form.BD, F.abs.mex.BD)
F.abs.res.CPD<- c(F.abs.lat.CPD, F.abs.form.CPD, F.abs.mex.CPD)
F.abs.res.CPL<- c(F.abs.lat.CPL, F.abs.form.CPL, F.abs.mex.CPL)
F.abs.res.PreDL <- c(F.abs.lat.PreDL, F.abs.form.PreDL, F.abs.mex.PreDL)
F.abs.res.DbL <- c(F.abs.lat.DbL, F.abs.form.DbL, F.abs.mex.DbL)
F.abs.res.HL<- c(F.abs.lat.HL, F.abs.form.HL, F.abs.mex.HL)
F.abs.res.HD<- c(F.abs.lat.HD, F.abs.form.HD, F.abs.mex.HD)
F.abs.res.HW <- c(F.abs.lat.HW, F.abs.form.HW, F.abs.mex.HW)
F.abs.res.SnL <- c(F.abs.lat.SnL, F.abs.form.SnL, F.abs.mex.SnL)
F.abs.res.OL <- c(F.abs.lat.OL, F.abs.form.OL, F.abs.mex.OL)


rawF2 <- cbind(rawF, F.abs.res.D, F.abs.res.P1.R, F.abs.res.LLSC, F.abs.res.SBLL, F.abs.res.BD, F.abs.res.CPD, F.abs.res.CPL, F.abs.res.PreDL, F.abs.res.DbL, F.abs.res.HL, F.abs.res.HD, F.abs.res.HW, F.abs.res.SnL, F.abs.res.OL)

Residual Comparisons

  • STEP FOUR: plot the mean of the |residuals| for both species, for all traits influenced by body size
library(ggbeeswarm)
library(dplyr)


ggplot(rawF2, aes(SPP, F.abs.res.D)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

scatter_violin <- ggplot(data=rawF2, aes(x=SPP, y=F.abs.res.D)) +
  geom_violin(trim = FALSE) + 
  stat_summary(
    fun.data = "mean_sdl",  fun.args = list(mult = 1),
    geom = "pointrange", color = "black"
    )

print(scatter_violin)

scatter_violin1 <- ggplot(data=rawF2, aes(x=SPP, y=F.abs.res.D)) +
  geom_violin(trim = FALSE) + 
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="crossbar", fill="red", width=0.03)

print(scatter_violin1)

ggplot(rawF2, aes(SPP, F.abs.res.P1.R)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3) 

ggplot(rawF2, aes(SPP, F.abs.res.LLSC)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(rawF2, aes(SPP, F.abs.res.SBLL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(rawF2, aes(SPP, F.abs.res.BD)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(rawF2, aes(SPP, F.abs.res.CPD)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(rawF2, aes(SPP, F.abs.res.CPL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3) 

ggplot(rawF2, aes(SPP, F.abs.res.PreDL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(rawF2, aes(SPP, F.abs.res.DbL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(rawF2, aes(SPP, F.abs.res.HL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(rawF2, aes(SPP, F.abs.res.HD)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(rawF2, aes(SPP, F.abs.res.HW)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(rawF2, aes(SPP, F.abs.res.SnL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(rawF2, aes(SPP, F.abs.res.OL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

Comparing variation

Variance tests

  1. t-test for residuals, continuous (do an f-test too, might be cool)
  2. residual discrete will go through glmer
  3. raw discrete will go through f-test (no idea how to do this otherwise), and results will be compared to visualizations to check for discrepencies.

(note: did non-parametric instead of transforming, but Mike said to transform, so I copied that work to the ‘morphology-analysis-final’ Rmd).

F-test on residuals doesn’t make much sense; the residuals themselves are representative of the variation present, as they are the distance from the mean. Therefore, F-test on residuals is like variance of the variance. Instead, I have to do a t-test on the absolute value of the residuals. In this sense, we want to see if the mean of the absolute residuals is higher or lower for asexual species–is the average amount of variation higher or lower for this trait? Based on the regressions, if the trait was influenced by body size, I will perform a t-test on the absolute value of the residuals. If the trait was not influenced by body size, I will perform an F-test of variance on the raw data.

Quick results summary: For the F-test on raw data, none of the traits were significantly different (P2L/R, A, SBDF, FLA). For the t-tests on residuals, the only significant traits are left pectoral (), right pectoral (lat>form), scales above lateral line (), scales below lateral line (form>lat), and head length ().

-   will do two-tailed and check out the residual means to infer direction; for traits in which we use raw data, a one-tailed f-test will be perfomed in both direction to determine which species is varying more. We will also visulize the variation using a histogram to confirm direction results.

(1) Continuous, residuals

  • will do two-tailed and check out the means to infer direction (see beginning for trait means w/o body correction)
  • this will be for the traits that did have a significant regression compared to SL; will be using the absolute value residuals.
  • For continuous traits, we will be using the transformed data.
T-tests
## 
##  Welch Two Sample t-test
## 
## data:  F.abs.lat.BD and F.abs.form.BD
## t = -1.2189, df = 193.87, p-value = 0.2244
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.013073360  0.003086295
## sample estimates:
##  mean of x  mean of y 
## 0.03134439 0.03633792
## 
##  Welch Two Sample t-test
## 
## data:  F.abs.lat.CPD and F.abs.form.CPD
## t = 1.0166, df = 185.78, p-value = 0.3107
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.003082334  0.009636619
## sample estimates:
##  mean of x  mean of y 
## 0.03008961 0.02681247
## 
##  Welch Two Sample t-test
## 
## data:  F.abs.lat.CPL and F.abs.form.CPL
## t = 0.13032, df = 189.33, p-value = 0.8965
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.007234423  0.008257913
## sample estimates:
##  mean of x  mean of y 
## 0.03117200 0.03066026
## 
##  Welch Two Sample t-test
## 
## data:  F.abs.lat.PreDL and F.abs.form.PreDL
## t = -0.76279, df = 193.28, p-value = 0.4465
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.01397601  0.00618051
## sample estimates:
##  mean of x  mean of y 
## 0.04262162 0.04651937
## 
##  Welch Two Sample t-test
## 
## data:  F.abs.lat.DbL and F.abs.form.DbL
## t = 2.178, df = 193.03, p-value = 0.03062
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.0008534224 0.0172226518
## sample estimates:
##  mean of x  mean of y 
## 0.04095673 0.03191869
## 
##  Welch Two Sample t-test
## 
## data:  F.abs.lat.HL and F.abs.form.HL
## t = -0.062173, df = 185.3, p-value = 0.9505
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.007482424  0.007025226
## sample estimates:
##  mean of x  mean of y 
## 0.02983505 0.03006365
## 
##  Welch Two Sample t-test
## 
## data:  F.abs.lat.HD and F.abs.form.HD
## t = 0.43483, df = 189.82, p-value = 0.6642
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.005249085  0.008217706
## sample estimates:
##  mean of x  mean of y 
## 0.02970775 0.02822344
## 
##  Welch Two Sample t-test
## 
## data:  F.abs.lat.HW and F.abs.form.HW
## t = -0.72235, df = 193.22, p-value = 0.471
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.008804796  0.004084239
## sample estimates:
##  mean of x  mean of y 
## 0.02753679 0.02989707
## 
##  Welch Two Sample t-test
## 
## data:  F.abs.lat.SnL and F.abs.form.SnL
## t = -1.2885, df = 191.25, p-value = 0.1991
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.009954161  0.002087792
## sample estimates:
##  mean of x  mean of y 
## 0.02404323 0.02797642
## 
##  Welch Two Sample t-test
## 
## data:  F.abs.lat.OL and F.abs.form.OL
## t = -1.629, df = 110.39, p-value = 0.1062
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -3.910179e-17  3.819266e-18
## sample estimates:
##    mean of x    mean of y 
## 4.471660e-18 2.211292e-17

(2) Discrete, residuals

Logan suggested doing a glm with poisson distribution (because it is count data) for these relationships. I originally performed Mann Whitney U tests, so will also include that. Overall, results don’t differ even though exact values are slightly different.

Had to remove the poisson part, becausethe residuals didn’t meet the non-integer requirement for poisson (it freaked out and took forever to load).

GLMs
rawF3 <- rawF2[rawF2$SPP !="p.mexicana", ]

Fglm_D <- glm(F.abs.res.D~SPP, data=rawF3)
print(summary(Fglm_D), show.residuals=TRUE)
## 
## Call:
## glm(formula = F.abs.res.D ~ SPP, data = rawF3)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.4962  -0.4088  -0.1003   0.2814   1.5729  
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.50579    0.03791  13.343   <2e-16 ***
## SPPp.latipinna  0.06326    0.05608   1.128    0.261    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.1537471)
## 
##     Null deviance: 30.176  on 196  degrees of freedom
## Residual deviance: 29.981  on 195  degrees of freedom
## AIC: 194.18
## 
## Number of Fisher Scoring iterations: 2
F.D.aov <- aov(Fglm_D)
summary(F.D.aov)
##              Df Sum Sq Mean Sq F value Pr(>F)
## SPP           1  0.196  0.1956   1.272  0.261
## Residuals   195 29.981  0.1537
Fglm_P1.R <- glm(F.abs.res.P1.R~SPP, data=rawF3)
print(summary(Fglm_P1.R), show.residuals=TRUE)
## 
## Call:
## glm(formula = F.abs.res.P1.R ~ SPP, data = rawF3)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.4613  -0.3169  -0.1726   0.1641   1.5727  
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.46441    0.03992  11.635   <2e-16 ***
## SPPp.latipinna  0.11380    0.05906   1.927   0.0554 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.1704834)
## 
##     Null deviance: 33.877  on 196  degrees of freedom
## Residual deviance: 33.244  on 195  degrees of freedom
## AIC: 214.54
## 
## Number of Fisher Scoring iterations: 2
F.P1.R.aov <- aov(Fglm_P1.R)
summary(F.P1.R.aov)
##              Df Sum Sq Mean Sq F value Pr(>F)  
## SPP           1   0.63  0.6331   3.713 0.0554 .
## Residuals   195  33.24  0.1705                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Fglm_LLSC <- glm(F.abs.res.LLSC~SPP, data=rawF3)
print(summary(Fglm_LLSC), show.residuals=TRUE)
## 
## Call:
## glm(formula = F.abs.res.LLSC ~ SPP, data = rawF3)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.7863  -0.4836  -0.2411   0.3615   3.0681  
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.80243    0.06431  12.477   <2e-16 ***
## SPPp.latipinna -0.07077    0.09515  -0.744    0.458    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.4425906)
## 
##     Null deviance: 86.550  on 196  degrees of freedom
## Residual deviance: 86.305  on 195  degrees of freedom
## AIC: 402.47
## 
## Number of Fisher Scoring iterations: 2
F.LLSC.aov <- aov(Fglm_LLSC)
summary(F.LLSC.aov)
##              Df Sum Sq Mean Sq F value Pr(>F)
## SPP           1   0.24  0.2448   0.553  0.458
## Residuals   195  86.31  0.4426
Fglm_SBLL <- glm(F.abs.res.SBLL~SPP, data=rawF3)
print(summary(Fglm_SBLL), show.residuals=TRUE)
## 
## Call:
## glm(formula = F.abs.res.SBLL ~ SPP, data = rawF3)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.29641  -0.21484  -0.17624  -0.07458   1.55334  
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.30326    0.03633   8.346 1.28e-14 ***
## SPPp.latipinna -0.02903    0.05376  -0.540     0.59    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.1412584)
## 
##     Null deviance: 27.587  on 196  degrees of freedom
## Residual deviance: 27.545  on 195  degrees of freedom
## AIC: 177.49
## 
## Number of Fisher Scoring iterations: 2
F.SBLL.aov <- aov(Fglm_SBLL)
summary(F.SBLL.aov)
##              Df Sum Sq Mean Sq F value Pr(>F)
## SPP           1  0.041 0.04119   0.292   0.59
## Residuals   195 27.545 0.14126

(3) Discrete, raw

EVEN THOUGH THESE ARE DISCRETE, NO IDEA WHAT ELSE TO DO WITH THEM (glmm on raw data will just compare means). Did both F-test and Levene’s test. Get the same overall results, albeit slightly different values. Might go with Levene’s since it is good with non-normal data.

This will only be for the traits that did not have a significant regression compared to SL (so P2s, A, SALL, SBDF, and FLA); none of these traits were transformed, so just doing raw for now.

Levene’s test

Still only performing this on the traits that did NOT vary with SL (P2L/R, A, SALL, SBLL [between sail and amz only], SBDF, FLA).

(FLT_P2L <- leveneTest(P2.L~SPP, data=rawF3)) #gives nothing since it's all the same value
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1     NaN    NaN
##       195
(FLT_P2R <- leveneTest(P2.R~SPP, data=rawF3)) #same as above
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1     NaN    NaN
##       195
(FLT_P1 <- leveneTest(P1~SPP, data=rawF3)) 
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1  0.0172 0.8959
##       195
(FLT_A <- leveneTest(A~SPP, data=rawF3)) 
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1  0.9255 0.3372
##       195
(FLT_SALL <- leveneTest(SALL~SPP, data=rawF3))
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value  Pr(>F)  
## group   1  4.7837 0.02992 *
##       195                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(FLT_SBDF <- leveneTest(SBDF~SPP, data=rawF3))
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1  0.0845 0.7716
##       195
(FLT_FLA <- leveneTest(FLA~SPP, data=rawF3))
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1  0.2675 0.6056
##       195

Summary of variance results

WHY WON’T THIS TABLE WORK!!!!!!!!

average.table <- matrix(c(MW_D\(p.value, mean(F.abs.lat.F.D, na.rm = TRUE), mean(F.abs.form.D, na.rm = TRUE), MW_P1\)p.value, mean(F.abs.lat.F.P1, na.rm = TRUE), mean(F.abs.form.P1, na.rm = TRUE), LT_P2R\("Pr(>F)", mean(lat.F\)P2.R, na.rm = TRUE), mean(form\(P2.R, na.rm = TRUE), LT_P2R\)“Pr(>F)”, mean(lat.F\(P2.L, na.rm = TRUE), mean(form\)P2.L, na.rm = TRUE), LT_A\("Pr(>F)", mean(lat.F\)A, na.rm = TRUE), mean(form\(A, na.rm = TRUE), MW_P1R\)p.value, mean(F.abs.lat.F.P1.R, na.rm = TRUE), mean(F.abs.form.P1.R, na.rm = TRUE), MW_LLSC\(p.value, mean(F.abs.lat.F.LLSC, na.rm = TRUE), mean(F.abs.form.LLSC, na.rm = TRUE), LT_SALL\)“Pr(>F)”, mean(lat.F\(SALL, na.rm = TRUE), mean(form\)SALL, na.rm = TRUE), LT_SBLL\("Pr(>F)", mean(F.abs.lat.F.SBLL, na.rm = TRUE), mean(F.abs.form.SBLL, na.rm = TRUE), LT_SBDF\)“Pr(>F)”, mean(lat.F\(SBDF, na.rm = TRUE), mean(form\)SBDF, na.rm = TRUE), ttest.F.abs.BD\(p.value, mean(F.abs.lat.F.BD, na.rm = TRUE), mean(F.abs.form.BD, na.rm = TRUE), ttest.F.abs.CPD\)p.value, mean(F.abs.lat.F.CPD, na.rm = TRUE), mean(F.abs.form.CPD, na.rm = TRUE), ttest.F.abs.CPL\(p.value, mean(F.abs.lat.F.CPL, na.rm = TRUE), mean(F.abs.form.CPL, na.rm = TRUE), ttest.F.abs.PreDL\)p.value, mean(F.abs.lat.F.PreDL, na.rm = TRUE), mean(F.abs.form.PreDL, na.rm = TRUE), ttest.F.abs.DbL\(p.value, mean(F.abs.lat.F.DbL, na.rm = TRUE), mean(F.abs.form.DbL, na.rm = TRUE), ttest.F.abs.HL\)p.value, mean(F.abs.lat.F.HL, na.rm = TRUE), mean(F.abs.form.HL, na.rm = TRUE), ttest.F.abs.HD\(p.value, mean(F.abs.lat.F.HD, na.rm = TRUE), mean(F.abs.form.HD, na.rm = TRUE), ttest.F.abs.HW\)p.value, mean(F.abs.lat.F.HW, na.rm = TRUE), mean(F.abs.form.HW, na.rm = TRUE), ttest.F.abs.SnL\(p.value, mean(F.abs.lat.F.SnL, na.rm = TRUE), mean(F.abs.form.SnL, na.rm = TRUE), ttest.F.abs.OL\)p.value, mean(F.abs.lat.F.OL, na.rm = TRUE), mean(F.abs.form.OL, na.rm = TRUE), LT_FLA\("Pr(>F)", mean(lat.F\)FLA, na.rm = TRUE), mean(form$FLA, na.rm = TRUE)), ncol=3, byrow=TRUE)

colnames(average.table)<- c(“p-value for variance”, “lat mean”, “form mean”)

rownames(average.table) <- c(“dorsal rays”, “L pect rays”, “R pelvic rays”, ”L pelvic rays”, “Anal rays”, “R pect rays”, ”lat line scales”, ”scales above LL”, ”scales below LL”, ”scales before dor”, “body dep”, “peduncle dep”, “peduncle leng”, “pre-dorsal”, “dorsal base”, “head length”, “head depth”, “head width”, “snout leng”, “orbital leng”, “fluct. asymmetries”)

average.table


Variance plots

plot_multi_histogram <- function(df, feature, label_column) {
    plt <- ggplot(df, aes(x=eval(parse(text=feature)), fill=eval(parse(text=label_column)))) +
    geom_histogram(bins=10, alpha=0.4, position="identity", aes(y = ..density..), color="black") +
    geom_density(alpha=0.2)  +
    labs(x=feature, y = "Density")
    plt + guides(fill=guide_legend(title=label_column))
}

plot_multi_histogram(rawF3, "F.abs.res.CPD", "SPP")

plot_multi_histogram(rawF3, "F.abs.res.DbL", "SPP")

plot_multi_histogram(rawF3, "F.abs.res.P1.R", "SPP")

PCA analysis

PCA

LOGAN: CHECK THAT EACH VARIABLES IS NEAR NORMALLY DISTRIBUTED. IF NOT, LOG TRANSFORM BEFORE PCA. ALSO CHECK THAT PCA CALCULATES Z SCORES AND PLOTS BASED ON THAT; IF NOT CONVERT TO Z SCORES THEN RUN PCA.

In this analysis, I will compare the principle components after centering and scaling the data. A PCA analysis will help us determine what aspects of morphology influence the variation in our data the most without worrying about differences in scales/measurements. Currently, data consists of 116 Sailfin and 186 Amazon.

Chart

(chart <- matrix(c("Variable chart:", "D: dorsal ray count", "P1: left pectoral ray count", "P2.R: right pelvic rays", "P2.L: left pelvic rays", "A: anal rays", "P1.R: right pectoral ray count", "LLSC: lateral line scale count", "SALL: scales above lateral line", "SBLL: scales below lateral line", "SBDF: scales before dorsal fin", "TL: total length", "SL: standard length", "BD: body depth", "CPD: caudal peduncle depth", "CPL: caudal peduncle length", "PreDL: pre-dorsal length", "DbL: dorsal base length", "HL/HW/HD: head length/width/depth", "SnL: snout length", "OL: ocular length"), ncol=1, byrow=TRUE))
##       [,1]                               
##  [1,] "Variable chart:"                  
##  [2,] "D: dorsal ray count"              
##  [3,] "P1: left pectoral ray count"      
##  [4,] "P2.R: right pelvic rays"          
##  [5,] "P2.L: left pelvic rays"           
##  [6,] "A: anal rays"                     
##  [7,] "P1.R: right pectoral ray count"   
##  [8,] "LLSC: lateral line scale count"   
##  [9,] "SALL: scales above lateral line"  
## [10,] "SBLL: scales below lateral line"  
## [11,] "SBDF: scales before dorsal fin"   
## [12,] "TL: total length"                 
## [13,] "SL: standard length"              
## [14,] "BD: body depth"                   
## [15,] "CPD: caudal peduncle depth"       
## [16,] "CPL: caudal peduncle length"      
## [17,] "PreDL: pre-dorsal length"         
## [18,] "DbL: dorsal base length"          
## [19,] "HL/HW/HD: head length/width/depth"
## [20,] "SnL: snout length"                
## [21,] "OL: ocular length"

Loadings

library(stringr)

rawFa <- subset(rawF, select = -c(17:18) ) #took out P2R and L since they don't vary at all
rawFa <- subset(rawFa, select=-c(34:49))#took out transformed values
rawFa <- raw1a[rawFa$SPP !="p.mexicana", ]

#PCA <- prcomp(raw1a[, 15:32], center=TRUE, scale. = TRUE) #includes all but pelvic traits, since they are the same in all species WILL SCALE/CENTER DATA ON MY OWN, LOGAN SAID IT IS NOT ALWAYS DOING IT FOR YOU EVEN WHEN YOU SPECIFY

z.scores <- scale(rawFa[, 15:33], center = TRUE, scale = TRUE)

rawFa <- cbind(rawFa, z.scores)

colnames(rawFa)[34:52] <- str_c( "z_", colnames(rawFa)[15:33] )


PCA.F <- prcomp(rawFa[, 34:52])

summary(PCA.F)
## Importance of components:
##                           PC1    PC2     PC3     PC4    PC5     PC6     PC7
## Standard deviation     3.1028 1.6016 1.15679 1.05508 0.9680 0.94482 0.84147
## Proportion of Variance 0.5056 0.1347 0.07027 0.05846 0.0492 0.04688 0.03718
## Cumulative Proportion  0.5056 0.6402 0.71052 0.76898 0.8182 0.86505 0.90224
##                            PC8     PC9    PC10    PC11    PC12    PC13    PC14
## Standard deviation     0.60870 0.56941 0.53848 0.50918 0.42822 0.34262 0.30695
## Proportion of Variance 0.01946 0.01703 0.01523 0.01361 0.00963 0.00616 0.00495
## Cumulative Proportion  0.92169 0.93872 0.95395 0.96756 0.97719 0.98335 0.98830
##                           PC15    PC16    PC17    PC18    PC19
## Standard deviation     0.26586 0.22708 0.22045 0.20646 0.09644
## Proportion of Variance 0.00371 0.00271 0.00255 0.00224 0.00049
## Cumulative Proportion  0.99201 0.99472 0.99727 0.99951 1.00000
(loadings.F <- PCA.F$rotation[, 1:5])
##                  PC1          PC2          PC3          PC4          PC5
## z_D     -0.020520500  0.544109784  0.212346161  0.094684498  0.035653658
## z_P1    -0.015917759 -0.354670657  0.429869516  0.256842294 -0.077459561
## z_A     -0.003336565  0.005801433  0.580021327  0.154415902  0.390945314
## z_P1.R  -0.031160960 -0.380480371  0.339656568  0.314580254 -0.054571012
## z_LLSC  -0.076777432  0.300421059  0.201652388  0.021827943 -0.319355143
## z_SALL  -0.042381190 -0.104853664 -0.483772621  0.638729212  0.204452587
## z_SBLL  -0.053376456  0.115848339  0.022011922  0.374108565 -0.769280021
## z_SBDF  -0.014265974 -0.430711284 -0.051179456 -0.353024363 -0.290143578
## z_SL    -0.321153547 -0.047972509  0.006703134 -0.046057979  0.007745119
## z_BD    -0.311448185  0.084217071 -0.008769912  0.015047533  0.025405662
## z_CPD   -0.315419529 -0.014937246 -0.003236237 -0.003875229  0.031254179
## z_CPL   -0.308415182 -0.065325154  0.027617029 -0.003856951 -0.000421000
## z_PreDL -0.299462647 -0.183499969 -0.025680568 -0.064337625 -0.004310840
## z_DbL   -0.262671960  0.283490980  0.077036393  0.036627085  0.040305851
## z_HL    -0.286034092 -0.044846717  0.087838472 -0.218198058 -0.041700863
## z_HD    -0.312497960  0.017111348 -0.022319361 -0.103197599  0.010280730
## z_HW    -0.313507823 -0.039378201 -0.101404799  0.069469411  0.042162723
## z_SnL   -0.273281812 -0.023254867 -0.117887375  0.223533019  0.100989264
## z_OL    -0.285894911  0.029239373  0.025362472 -0.091188329  0.001490478
F.sorted.loadings.1 <- loadings[order(loadings[, 1]), 1]
myTitle <- "Loadings Plot for PC1" 
myXlab  <- "Variable Loadings"
dotchart(sorted.loadings.1, main=myTitle, xlab=myXlab, cex=1.5, col="red")

F.sorted.loadings.2 <- loadings[order(loadings[, 2]), 2]
myTitle <- "Loadings Plot for PC2" 
myXlab  <- "Variable Loadings"
dotchart(sorted.loadings.2, main=myTitle, xlab=myXlab, cex=1.5, col="red")

F.sorted.loadings.3 <- loadings[order(loadings[, 3]), 3]
myTitle <- "Loadings Plot for PC3" 
myXlab  <- "Variable Loadings"
dotchart(sorted.loadings.3, main=myTitle, xlab=myXlab, cex=1.5, col="red")

FVM_PCA <- varimax(PCA.F$rotation) 

FVM_PCA
## $loadings
## 
## Loadings:
##         PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 PC11 PC12 PC13 PC14 PC15 PC16
## z_D          1                                                                
## z_P1                                 1                                        
## z_A                         -1                                                
## z_P1.R           1                                                            
## z_LLSC                           1                                            
## z_SALL               1                                                        
## z_SBLL                  -1                                                    
## z_SBDF                                       1                                
## z_SL                                                                          
## z_BD                                                                 -1       
## z_CPD                                                                         
## z_CPL                                                       1                 
## z_PreDL                                                                   -1  
## z_DbL                                                            1            
## z_HL                                                  -1                      
## z_HD    -1                                                                    
## z_HW                                                                          
## z_SnL                                   -1                                    
## z_OL                                             -1                           
##         PC17 PC18 PC19
## z_D                   
## z_P1                  
## z_A                   
## z_P1.R                
## z_LLSC                
## z_SALL                
## z_SBLL                
## z_SBDF                
## z_SL              -1  
## z_BD                  
## z_CPD    1            
## z_CPL                 
## z_PreDL               
## z_DbL                 
## z_HL                  
## z_HD                  
## z_HW         -1       
## z_SnL                 
## z_OL                  
## 
##                  PC1   PC2   PC3   PC4   PC5   PC6   PC7   PC8   PC9  PC10
## SS loadings    1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000
## Proportion Var 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053
## Cumulative Var 0.053 0.105 0.158 0.211 0.263 0.316 0.368 0.421 0.474 0.526
##                 PC11  PC12  PC13  PC14  PC15  PC16  PC17  PC18  PC19
## SS loadings    1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000
## Proportion Var 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053
## Cumulative Var 0.579 0.632 0.684 0.737 0.789 0.842 0.895 0.947 1.000
## 
## $rotmat
##              [,1]          [,2]         [,3]         [,4]         [,5]
##  [1,]  0.31249792 -0.0205204927 -0.031160966 -0.042381191  0.053376456
##  [2,] -0.01711135  0.5441099915 -0.380480416 -0.104853659 -0.115848340
##  [3,]  0.02231936  0.2123461835  0.339656595 -0.483772624 -0.022011921
##  [4,]  0.10319760  0.0946846611  0.314580290  0.638729212 -0.374108563
##  [5,] -0.01028073  0.0356537961 -0.054571039  0.204452588  0.769280023
##  [6,]  0.01785693  0.1138049945  0.333505981 -0.162866650  0.360588826
##  [7,]  0.04241562  0.0150191001 -0.049034727  0.352323302  0.334661481
##  [8,] -0.03663816  0.1056346369 -0.580674489  0.169686781  0.010823756
##  [9,] -0.10383393  0.1838928675  0.351450536  0.323168458  0.018930194
## [10,]  0.07351716  0.5773931611  0.180775790 -0.003222712  0.075151742
## [11,] -0.12972061  0.1817994549 -0.096055951  0.065526582 -0.038856063
## [12,]  0.01632720 -0.3540782874  0.006267311 -0.082774297  0.027013379
## [13,]  0.33099960  0.2211759356 -0.124511858  0.005537603 -0.015868468
## [14,]  0.30895210 -0.2033507166 -0.048905296  0.071346403 -0.014505627
## [15,] -0.60326912 -0.0001677204 -0.034841912  0.010989103 -0.009750056
## [16,] -0.38358390 -0.0116992547  0.016399216  0.006741568 -0.018612187
## [17,]  0.19201788  0.0635453499 -0.025034542 -0.018439830 -0.012063873
## [18,] -0.30901881  0.0511901172  0.021360494  0.050903335  0.004615888
## [19,] -0.07491462  0.0342424695 -0.014851551 -0.011616927  0.006376797
##               [,6]         [,7]         [,8]         [,9]       [,10]
##  [1,]  0.003336567 -0.076777432 -0.015917754  0.273281812 -0.01426598
##  [2,] -0.005801409  0.300421058 -0.354670601  0.023254866 -0.43071103
##  [3,] -0.580021348  0.201652387  0.429869465  0.117887375 -0.05117936
##  [4,] -0.154415920  0.021827942  0.256842249 -0.223533017 -0.35302431
##  [5,] -0.390945308 -0.319355143 -0.077459557 -0.100989264 -0.29014356
##  [6,]  0.678046778  0.122830092  0.322864236 -0.041264558 -0.30973564
##  [7,] -0.069551587  0.787265819  0.028279063 -0.016374122  0.33981671
##  [8,]  0.073822411 -0.129997900  0.683803003  0.238247042  0.07270999
##  [9,]  0.053101938 -0.116034400 -0.180462530  0.727295171  0.06490869
## [10,]  0.069167090 -0.233863909 -0.029095254 -0.251086265  0.57582026
## [11,] -0.036428953 -0.140425857  0.060614728  0.007991713  0.15181796
## [12,] -0.067362261  0.100952718 -0.037569110  0.316940134  0.06871165
## [13,]  0.003841896 -0.051004561 -0.009138624  0.210326848 -0.04090367
## [14,] -0.015130785 -0.073580474 -0.003112406 -0.220430759  0.13401581
## [15,] -0.020921891 -0.004326046  0.057605944  0.001628406  0.01665144
## [16,] -0.041177179 -0.028014708 -0.030574960 -0.031969747  0.04367637
## [17,] -0.023685447  0.015858269 -0.034436105  0.069724239  0.01237999
## [18,]  0.019201499  0.042347741 -0.020689686 -0.022953636 -0.01990296
## [19,]  0.006164639 -0.004395313  0.010626212 -0.014926716  0.01541649
##              [,11]         [,12]         [,13]       [,14]        [,15]
##  [1,]  0.285894911  0.2860340891 -0.3084151802 -0.26267196  0.311448230
##  [2,] -0.029239373  0.0448467183 -0.0653251554  0.28349098 -0.084217076
##  [3,] -0.025362472 -0.0878384731  0.0276170289  0.07703639  0.008769916
##  [4,]  0.091188329  0.2181980580 -0.0038569501  0.03662709 -0.015047517
##  [5,] -0.001490478  0.0417008643 -0.0004210003  0.04030585 -0.025405664
##  [6,]  0.112061200  0.0133294524  0.0941478531  0.10466690 -0.010301441
##  [7,]  0.049408071 -0.0438628581  0.0273782784 -0.08582446  0.029805688
##  [8,] -0.195765848  0.0001931758 -0.0967786187  0.05603172 -0.092374351
##  [9,] -0.194190983 -0.2704503373  0.0039371989  0.16163619 -0.024001315
## [10,] -0.105014717  0.3367474723 -0.1012714150  0.12096771 -0.066527030
## [11,]  0.858583315 -0.3452713601  0.0378111316  0.13943376 -0.067901769
## [12,]  0.215135787  0.5830419346  0.0373079295  0.44304522 -0.267402631
## [13,]  0.076788408  0.2511912660  0.6766249031 -0.22399552  0.079029908
## [14,] -0.099191418 -0.2013201581  0.3094939486  0.56121797  0.182132288
## [15,] -0.020341964  0.1952116469  0.1854368791  0.16948785  0.613753873
## [16,]  0.005784728  0.0670356297  0.4305343660 -0.35613586 -0.380032933
## [17,]  0.022014985 -0.0976595958  0.1043676620 -0.14621047  0.466206623
## [18,]  0.078638526  0.2356791223 -0.1786601132 -0.03129669  0.159487570
## [19,] -0.009325033  0.0019841796  0.2271475475  0.14397666  0.031508856
##              [,16]         [,17]       [,18]         [,19]
##  [1,]  0.299462660 -0.3154195267  0.31350782  0.3211535448
##  [2,]  0.183499967 -0.0149372463  0.03937820  0.0479725092
##  [3,]  0.025680568 -0.0032362367  0.10140480 -0.0067031341
##  [4,]  0.064337628 -0.0038752291 -0.06946941  0.0460579787
##  [5,]  0.004310839  0.0312541785 -0.04216272 -0.0077451191
##  [6,]  0.094321618  0.0235979294  0.01113854 -0.0007574018
##  [7,] -0.002910177 -0.0678981717  0.01303191  0.0107450003
##  [8,]  0.061978932  0.0733235456  0.01665991  0.0237263892
##  [9,]  0.032578934  0.0005690372  0.08018419 -0.0019378806
## [10,]  0.088485169  0.0114822904 -0.07746342  0.0153016666
## [11,] -0.024194833  0.0111232685 -0.03698910 -0.0576282508
## [12,]  0.082862835  0.1901776987 -0.21448486 -0.0313770543
## [13,] -0.325576318 -0.0881730361  0.07851685 -0.2719388677
## [14,]  0.278172557 -0.0025039575  0.45998890  0.0057360339
## [15,]  0.098199393 -0.3265824770 -0.15905999 -0.1032194987
## [16,]  0.539959477  0.0874086611  0.21586648  0.1969291889
## [17,]  0.298616144  0.6794561621 -0.36223927  0.0963089460
## [18,] -0.307574130  0.5173087578  0.63236455 -0.1146566387
## [19,] -0.417601103  0.0311785149 -0.04633338  0.8609702685
(eig.val.F <- get_eigenvalue(PCA.F))
##         eigenvalue variance.percent cumulative.variance.percent
## Dim.1  9.627322100      50.55524671                    50.55525
## Dim.2  2.565066911      13.46974674                    64.02499
## Dim.3  1.338171081       7.02703913                    71.05203
## Dim.4  1.113192421       5.84562528                    76.89766
## Dim.5  0.936930341       4.92003322                    81.81769
## Dim.6  0.892686259       4.68769753                    86.50539
## Dim.7  0.708077344       3.71827435                    90.22366
## Dim.8  0.370518648       1.94567726                    92.16934
## Dim.9  0.324232333       1.70261735                    93.87196
## Dim.10 0.289958908       1.52263984                    95.39460
## Dim.11 0.259260306       1.36143453                    96.75603
## Dim.12 0.183375968       0.96294870                    97.71898
## Dim.13 0.117387598       0.61642884                    98.33541
## Dim.14 0.094220052       0.49477081                    98.83018
## Dim.15 0.070682475       0.37116967                    99.20135
## Dim.16 0.051564134       0.27077493                    99.47212
## Dim.17 0.048596864       0.25519313                    99.72732
## Dim.18 0.042626489       0.22384134                    99.95116
## Dim.19 0.009300807       0.04884064                   100.00000

Plots

Component extractions
library(AMR) 
library(ggplot2)
library(ggfortify)

(PCA.F$sdev ^ 2)
##  [1] 9.627322100 2.565066911 1.338171081 1.113192421 0.936930341 0.892686259
##  [7] 0.708077344 0.370518648 0.324232333 0.289958908 0.259260306 0.183375968
## [13] 0.117387598 0.094220052 0.070682475 0.051564134 0.048596864 0.042626489
## [19] 0.009300807
evplot <- function(ev)
{
    # Broken stick model (MacArthur 1957)
    n <- length(ev)
    bsm <- data.frame(j=seq(1:n), p=0)
    bsm$p[1] <- 1/n
    for (i in 2:n) bsm$p[i] <- bsm$p[i-1] + (1/(n + 1 - i))
    bsm$p <- 100*bsm$p/n
    # Plot eigenvalues and % of variation for each axis
    op <- par(mfrow=c(2,1))
    barplot(ev, main="Eigenvalues", col="bisque", las=2)
    abline(h=mean(ev), col="red")
    legend("topright", "Average eigenvalue", lwd=1, col=2, bty="n")
    barplot(t(cbind(100*ev/sum(ev), bsm$p[n:1])), beside=TRUE, 
        main="% variation", col=c("bisque",2), las=2)
    legend("topright", c("% eigenvalue", "Broken stick model"), 
        pch=15, col=c("bisque",2), bty="n")
    par(op)
}


ev <- PCA.F$sdev^2 
evplot(ev) #according to Kaiser-Guttman criteron, we can use the first 4 PCs, even though the broken stick model shows only the first above the red bar plot... not 100% confident I know what this means, but pretty sure PC1 is body size

PCA 1v2
plotF1<- autoplot(PCA.F, data = rawFa, colour='SPP', loadings=FALSE, loadings.label=FALSE, frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal() 
plotF1

By zone
plotF2<- autoplot(PCA.F, data = rawFa, colour='QUARTILE', shape="SPP", frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal() 
plotF2

By Basin
plotF3<- autoplot(PCA.F, data = rawFa, colour='BASIN', shape="SPP", frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal() 
plotF3

By watershed
plotF4<- autoplot(PCA.F, data = rawFa, colour='WATERSHED', shape="SPP", frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal() 
plotF4
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse

PCA 2v3
plotF5<- autoplot(PCA.F, x=2, y=3, data = rawFa, colour='SPP', loadings=FALSE, loadings.label=FALSE, frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal()
plotF5

By zone
plotF6<- autoplot(PCA.F, x=2, y=3, data = rawFa, colour='QUARTILE', shape="SPP", frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal() 
plotF6

By basin
plotF7<- autoplot(PCA.F, x=2, y=3, data = rawFa, colour='BASIN', shape="SPP", frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal() 
plotF7

By watershed
plotF8<- autoplot(PCA.F, x=2, y=3, data = rawFa, colour='WATERSHED', shape="SPP", frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal() 
plotF8
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse







Tampico population

(1) Continuous, residuals

  • will do two-tailed and check out the means to infer direction (see beginning for trait means w/o body correction)
  • this will be for the traits that did have a significant regression compared to SL; will be using the absolute value residuals.
  • For continuous traits, we will be using the transformed data.
ANOVAs
Ftampico <- rawF2[rawF2$COUNTY.ST == "Tamaulipas/MX",]
Flat.mx <- Ftampico[Ftampico$SPP == "p.latipinna",]
Fform.mx <- Ftampico[Ftampico$SPP == "p.formosa",]
Fmex.mx <- Ftampico[Ftampico$SPP == "p.mexicana",]



MX.BD <- aov(F.abs.res.BD~SPP, data=Ftampico)
summary(MX.BD)
##             Df   Sum Sq   Mean Sq F value Pr(>F)
## SPP          2 0.000181 0.0000905   0.239  0.788
## Residuals   69 0.026122 0.0003786
MX.CPD <- aov(F.abs.res.CPD~SPP, data=Ftampico)
summary(MX.CPD)
##             Df   Sum Sq   Mean Sq F value Pr(>F)
## SPP          2 0.001447 0.0007235   1.769  0.178
## Residuals   69 0.028217 0.0004089
MX.CPL <- aov(F.abs.res.CPL~SPP, data=Ftampico)
summary(MX.CPL)
##             Df  Sum Sq   Mean Sq F value Pr(>F)
## SPP          2 0.00129 0.0006452   1.333   0.27
## Residuals   69 0.03339 0.0004839
MX.PreDL <- aov(F.abs.res.PreDL~SPP, data=Ftampico)
summary(MX.PreDL)
##             Df  Sum Sq  Mean Sq F value Pr(>F)
## SPP          2 0.00237 0.001186   0.695  0.503
## Residuals   69 0.11777 0.001707
MX.DbL <- aov(F.abs.res.DbL~SPP, data=Ftampico)
summary(MX.DbL)
##             Df  Sum Sq   Mean Sq F value Pr(>F)
## SPP          2 0.00159 0.0007967   1.275  0.286
## Residuals   69 0.04310 0.0006247
MX.HL <- aov(F.abs.res.HL~SPP, data=Ftampico)
summary(MX.HL)
##             Df  Sum Sq   Mean Sq F value Pr(>F)  
## SPP          2 0.00631 0.0031551   4.699 0.0122 *
## Residuals   69 0.04633 0.0006715                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
MX.HD <- aov(F.abs.res.HD~SPP, data=Ftampico)
summary(MX.HD)
##             Df  Sum Sq   Mean Sq F value Pr(>F)
## SPP          2 0.00136 0.0006819   1.462  0.239
## Residuals   69 0.03219 0.0004665
MX.HW <- aov(F.abs.res.HW~SPP, data=Ftampico)
summary(MX.HW)
##             Df   Sum Sq  Mean Sq F value Pr(>F)  
## SPP          2 0.002485 0.001242    2.78  0.069 .
## Residuals   69 0.030840 0.000447                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
MX.SnL <- aov(F.abs.res.SnL~SPP, data=Ftampico)
summary(MX.SnL)
##             Df  Sum Sq   Mean Sq F value Pr(>F)  
## SPP          2 0.00342 0.0017114   2.523 0.0876 .
## Residuals   69 0.04681 0.0006784                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
MX.OL <- aov(F.abs.res.OL~SPP, data=Ftampico)
summary(MX.OL)
##             Df    Sum Sq   Mean Sq F value   Pr(>F)    
## SPP          2 6.549e-34 3.275e-34   26.02 3.79e-09 ***
## Residuals   69 8.683e-34 1.260e-35                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(2) Discrete, residuals

Logan suggested doing a glm with poisson distribution (because it is count data) for these relationships. I originally performed Mann Whitney U tests, so will also include that. Overall, results don’t differ even though exact values are slightly different.

Had to remove the poisson part, becausethe residuals didn’t meet the non-integer requirement for poisson (it freaked out and took forever to load).

GLMs
FMX.glm_D <- glm(F.abs.res.D~SPP, data=Ftampico)
print(summary(FMX.glm_D), show.residuals=TRUE)
## 
## Call:
## glm(formula = F.abs.res.D ~ SPP, data = Ftampico)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.51391  -0.23043  -0.07983   0.03160   1.37970  
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.52348    0.06794   7.705 6.91e-11 ***
## SPPp.latipinna -0.15297    0.09608  -1.592    0.116    
## SPPp.mexicana  -0.42956    0.09608  -4.471 2.98e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.11077)
## 
##     Null deviance: 9.9185  on 71  degrees of freedom
## Residual deviance: 7.6431  on 69  degrees of freedom
## AIC: 50.841
## 
## Number of Fisher Scoring iterations: 2
FMX.D.aov <- aov(FMX.glm_D)
summary(FMX.D.aov)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## SPP          2  2.275  1.1377   10.27 0.000125 ***
## Residuals   69  7.643  0.1108                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
FMX.glm_P1.R <- glm(F.abs.res.P1.R~SPP, data=Ftampico)
print(summary(FMX.glm_P1.R), show.residuals=TRUE)
## 
## Call:
## glm(formula = F.abs.res.P1.R ~ SPP, data = Ftampico)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.3770  -0.2437  -0.1199   0.1534   0.8284  
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.38686    0.07117   5.436 7.73e-07 ***
## SPPp.latipinna  0.11951    0.10064   1.187   0.2391    
## SPPp.mexicana   0.18425    0.10064   1.831   0.0715 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.1215512)
## 
##     Null deviance: 8.8064  on 71  degrees of freedom
## Residual deviance: 8.3870  on 69  degrees of freedom
## AIC: 57.529
## 
## Number of Fisher Scoring iterations: 2
FMX.P1.R.aov <- aov(FMX.glm_P1.R)
summary(FMX.P1.R.aov)
##             Df Sum Sq Mean Sq F value Pr(>F)
## SPP          2  0.419  0.2097   1.725  0.186
## Residuals   69  8.387  0.1216
FMX.glm_LLSC <- glm(F.abs.res.LLSC~SPP, data=Ftampico)
print(summary(FMX.glm_LLSC), show.residuals=TRUE)
## 
## Call:
## glm(formula = F.abs.res.LLSC ~ SPP, data = Ftampico)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.5060  -0.1993  -0.1323   0.1392   0.9289  
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.59679    0.07544   7.911 2.91e-11 ***
## SPPp.latipinna -0.16351    0.10669  -1.533    0.130    
## SPPp.mexicana  -0.15647    0.10669  -1.467    0.147    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.1365928)
## 
##     Null deviance: 9.8350  on 71  degrees of freedom
## Residual deviance: 9.4249  on 69  degrees of freedom
## AIC: 65.929
## 
## Number of Fisher Scoring iterations: 2
FMX.LLSC.aov <- aov(FMX.glm_LLSC)
summary(FMX.LLSC.aov)
##             Df Sum Sq Mean Sq F value Pr(>F)
## SPP          2  0.410  0.2051   1.501   0.23
## Residuals   69  9.425  0.1366
FMX.glm_SBLL <- glm(F.abs.res.SBLL~SPP, data=Ftampico)
print(summary(FMX.glm_SBLL), show.residuals=TRUE)
## 
## Call:
## glm(formula = F.abs.res.SBLL ~ SPP, data = Ftampico)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.28156  -0.18794  -0.08678  -0.02909   0.81846  
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)   
## (Intercept)     0.16217    0.05838   2.778  0.00704 **
## SPPp.latipinna  0.18721    0.08256   2.268  0.02649 * 
## SPPp.mexicana   0.12392    0.08256   1.501  0.13792   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.08179103)
## 
##     Null deviance: 6.0789  on 71  degrees of freedom
## Residual deviance: 5.6436  on 69  degrees of freedom
## AIC: 29.005
## 
## Number of Fisher Scoring iterations: 2
FMX.SBLL.aov <- aov(FMX.glm_SBLL)
summary(FMX.SBLL.aov)
##             Df Sum Sq Mean Sq F value Pr(>F)  
## SPP          2  0.435 0.21764   2.661  0.077 .
## Residuals   69  5.644 0.08179                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(3) Discrete, raw

EVEN THOUGH THESE ARE DISCRETE, NO IDEA WHAT ELSE TO DO WITH THEM (glmm on raw data will just compare means). Did both F-test and Levene’s test. Get the same overall results, albeit slightly different values. Might go with Levene’s since it is good with non-normal data.

This will only be for the traits that did not have a significant regression compared to SL (so P2s, A, SALL, SBDF, and FLA); none of these traits were transformed, so just doing raw for now.

Levene’s test

Still only performing this on the traits that did NOT vary with SL (P2L/R, A, SALL, SBLL [between sail and amz only], SBDF, FLA).

library(car)
library(carData)

(LT_P2L <- leveneTest(P2.L~SPP, data=Ftampico)) #gives nothing since it's all the same value
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  2     NaN    NaN
##       69
(LT_P2R <- leveneTest(P2.R~SPP, data=Ftampico)) #same as above
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  2     NaN    NaN
##       69
(LT_P1 <- leveneTest(P1~SPP, data=Ftampico))
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  2  0.4544 0.6367
##       69
(LT_A <- leveneTest(A~SPP, data=Ftampico)) 
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  2  0.4864 0.6169
##       69
(LT_SALL <- leveneTest(SALL~SPP, data=Ftampico))
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  2  1.0299 0.3625
##       69
(LT_SBDF <- leveneTest(SBDF~SPP, data=Ftampico))
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  2  1.8319 0.1678
##       69
(LT_FLA <- leveneTest(FLA~SPP, data=Ftampico))
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value  Pr(>F)  
## group  2  2.9858 0.05706 .
##       69                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

other

test.lat.OL <- lm(lat$OL ~ lat$SL)
t.sd.lat.OL <- rstandard(reg.lat.OL)
test.lat.OL.plot <- ggplot(lat, aes(x = SL_log, y = OL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
test.lat.OL.plot
## `geom_smooth()` using formula = 'y ~ x'

test.form.OL <- lm(form$OL ~ form$SL)
t.sd.form.OL <- rstandard(reg.form.OL)
test.form.OL.plot <- ggplot(form, aes(x = SL_log, y = OL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
test.form.OL.plot
## `geom_smooth()` using formula = 'y ~ x'